# Production Technology

The dataset contains `N = 441` firms observed over `T = 12` years, 1968-1979. There variables are: 
* `lcap`: Log of capital stock, $k_{it}$ 
* `lemp`: log of employment, $\ell_{it}$ 
* `ldsa`: log of deflated sales, $y_{it}$
* `year`: the calendar year of the observation, `year` $ = 1968, ..., 1979$, 
* `firmid`: anonymized indicator variable for the firm, $i = 1, ..., N$, with $N=441$. 

In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import os
print(os.listdir())
import project_1 as est
from numpy import linalg as la
from scipy.stats import chi2
from scipy import stats
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

['reg_table.tex', '2025-09_Project_1.pdf', 'project_1.py', 'firms.csv', '__pycache__', 'read (1).ipynb']


In [2]:
dat = pd.read_csv('firms.csv')

# Descriptives

In [3]:
dat.describe()

Unnamed: 0,firmid,year,lcap,lemp,ldsa
count,5292.0,5292.0,5292.0,5292.0,5292.0
mean,221.0,1973.5,-7.125472e-09,-1.252834e-08,4.695767e-09
std,127.317437,3.452379,1.310973,1.180122,1.232499
min,1.0,1968.0,-3.86495,-3.38278,-3.55154
25%,111.0,1970.75,-0.9083267,-0.785527,-0.927972
50%,221.0,1973.5,-0.1180615,-0.1137295,-0.102971
75%,331.0,1976.25,0.906334,0.793006,0.8562296
max,441.0,1979.0,4.103687,3.371332,3.913391


In [None]:
#Descriptive statistics table
cols = ['lcap','lemp','ldsa']

tab = (
    dat[cols]
      .agg(['mean','kurt'])   # 'kurt' = excess kurtosis (normal = 0)
      .T
      .round(3)
      .rename(columns={'mean':'Mean', 'kurt':'Kurtosis'})
)

latex = tab.to_latex(
    index=True,
    caption='Beskrivende statistik (samlet): middelværdi og kurtosis',
    label='tab:desc_overall',
    float_format="%.3f",
    escape=False
)

print(latex)

In [None]:
# De-log-transform variables 
dat['cap'] = np.exp(dat['lcap'])
dat['emp'] = np.exp(dat['lemp'])
dat['dsa'] = np.exp(dat['ldsa'])

cols = ['cap','emp','dsa']

tab = (
    dat[cols]
      .agg(['mean','kurt'])   # 'kurt' = excess kurtosis (normal = 0)
      .T
      .round(3)
      .rename(columns={'mean':'Mean', 'kurt':'Kurtosis'})
)

latex = tab.to_latex(
    index=True,
    caption='Beskrivende statistik (samlet): middelværdi og kurtosis',
    label='tab:desc_overall',
    float_format="%.3f",
    escape=False
)

print(latex)

In [None]:
blue = "#5aa9e6"  # lys blå

fig = plt.figure(figsize=(10, 6), constrained_layout=True)
gs = fig.add_gridspec(nrows=2, ncols=2, height_ratios=[1.2, 1.0])

ax_top = fig.add_subplot(gs[0, :])                 # øverste (salg), span begge kolonner
ax_bl  = fig.add_subplot(gs[1, 0], sharey=ax_top)  # nederst venstre, deler y med top
ax_br  = fig.add_subplot(gs[1, 1], sharey=ax_top)  # nederst højre, deler y med top

# Øverst: Salg (centeret)
ax_top.hist(dat['ldsa'].dropna(), bins=30, color=blue, edgecolor="white", linewidth=0.5)
ax_top.set_title("Deflated Sales (log)", loc="center")

# Nederst venstre: Beskæftigelse
ax_bl.hist(dat['lemp'].dropna(), bins=30, color=blue, edgecolor="white", linewidth=0.5)
ax_bl.set_title("Employment (log)", loc="center")

# Nederst højre: Kapital
ax_br.hist(dat['lcap'].dropna(), bins=30, color=blue, edgecolor="white", linewidth=0.5)
ax_br.set_title("Adjusted Capital Stock (log)", loc="center")

# Fælles y-akse: kun én y-label på figuren
for ax in (ax_top, ax_bl, ax_br):
    ax.grid(True, alpha=0.25)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylabel("")              # ingen individuelle y-labels

fig.supylabel("Frequency")          # fælles y-label for alle paneler

# Skjul y-tick labels på højre nederste panel for mindre støj
ax_br.tick_params(labelleft=False)

# Ingen over-overskrift
try:
    fig._suptitle.remove()
except Exception:
    pass
plt.show()

In [None]:
years = np.sort(dat['year'].unique())

# Blå palette: vælg et snævert interval af 'Blues' for harmoniske nuancer
cmap = plt.cm.Blues
colors = cmap(np.linspace(0.45, 0.90, len(years)))  # gør intervallet smallere/større efter smag
palette = {y: colors[i] for i, y in enumerate(years)}

fig, axes = plt.subplots(1, 2, figsize=(11, 5), sharey=True)

# Venstre: Employment (log) vs Adjusted Sales (log)
sns.scatterplot(
    x='lemp', y='ldsa', data=dat, ax=axes[0],
    hue='year', hue_order=years, palette=palette,
    edgecolor="white", s=28, alpha=0.85, legend=False
)
sns.regplot(
    x='lemp', y='ldsa', data=dat, ax=axes[0], scatter=False,
    color="#3a3a3a", ci=None, line_kws={'linewidth': 2}
)
axes[0].set_xlabel("Employment (log)")
axes[0].set_ylabel("")

# Højre: Capital (log) vs Adjusted Sales (log)
sns.scatterplot(
    x='lcap', y='ldsa', data=dat, ax=axes[1],
    hue='year', hue_order=years, palette=palette,
    edgecolor="white", s=28, alpha=0.85, legend=True
)
sns.regplot(
    x='lcap', y='ldsa', data=dat, ax=axes[1], scatter=False,
    color="#3a3a3a", ci=None, line_kws={'linewidth': 2}
)
axes[1].set_xlabel("Adjusted Capital Stock (log)")
axes[1].set_ylabel("")

# Kosmetik
for ax in axes:
    ax.grid(True, alpha=0.25)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    sns.despine(ax=ax)

fig.supylabel("Adjusted Sales (log)")
axes[1].legend(title="Year", bbox_to_anchor=(1.02, 1), loc="upper left", borderaxespad=0)

try:
    fig._suptitle.remove()
except Exception:
    pass

plt.tight_layout()
plt.show()


# Converting data to numpy format 

In [4]:
dat.ldsa.values.shape

(5292,)

In [5]:
#Define N and T for our models 
N = dat.firmid.unique().size
T = dat.year.unique().size
assert dat.shape[0] == N*T, f'Error: data is not a balanced panel'
print(f'Data has N={N} and T={T}')

Data has N=441 and T=12


In [6]:
#Define sales as our dependent variable 
y = dat.ldsa.values.reshape((N*T,1))

#Laver et konstantled som repræsenterer det teknologiske niveau, A og definerer vores X matrix
ones = np.ones((N*T,1)) 
l = dat.lemp.values.reshape((N*T,1))
k = dat.lcap.values.reshape((N*T,1))
X = np.hstack([ones, l, k])
label_y = 'log_omsætning'
label_x = ['const', 'log_ansatte', 'log_kapital']

#Rename columns for easier reading
rename_dict = {'year': 'aar', 'ldsa':'log_omsætning', 'lemp': 'log_ansatte', 'lcap': 'log_kapital'}
dat = dat.rename(columns=rename_dict)

# OLS

In [7]:
#Estimerer pooled ols for benchmark 
pooled_result = est.estimate(y, X, T=T)
est.print_table((label_y, label_x), pooled_result, title='Pooled OLS regression', floatfmt='.4f')

Pooled OLS regression
Dependent variable: log_omsætning

               Beta      Se    t-values
-----------  ------  ------  ----------
const        0.0000  0.0050      0.0000
log_ansatte  0.6748  0.0102     66.4625
log_kapital  0.3100  0.0091     33.9237
R² = 0.914
σ² = 0.131


In [8]:
b_hat = pooled_result['b_hat']
cov   = pooled_result['cov']

ansatte_index = label_x.index('log_ansatte')
kapital_index = label_x.index('log_kapital')

# Gør vektorer/matricer klar
b = np.asarray(b_hat).reshape(-1)   # (P,)
V = np.asarray(cov)                 # (P,P)
P = b.size
if V.shape != (P, P):
    raise ValueError(f"Kovarians har forkert form: {V.shape}, forventede {(P,P)}")

# Byg R (1 x P) med 1 på de to relevante koefficienter
R = np.zeros((1, P))
R[0, [ansatte_index, kapital_index]] = 1.0

# Wald-test for H0: beta_kapital + beta_ansatte = 1
num = float(R @ b - 1.0)           # c'β - q, q=1
den = float(R @ V @ R.T)           # c' V c
if not np.isfinite(den) or den <= 0:
    raise ValueError("Variansen af restriktionen er ikke positiv/finitsk; tjek kovarians og indeks.")

wald = (num ** 2) / den
p_value = stats.chi2.sf(wald, df=1)

print(f"Wald test statistic: {wald:.4f}")
print(f"p-value: {p_value:.4g}")

Wald test statistic: 12.9800
p-value: 0.0003148


  num = float(R @ b - 1.0)           # c'β - q, q=1
  den = float(R @ V @ R.T)           # c' V c


# FE

In [9]:
# Create transformation matrix
Q_T = np.eye(T) - np.ones((T, T)) / T  
#Print dimension of Q_T
print(f'Dimension of Q_T: {Q_T.shape}')

#Create y and x dot using the perm function
y_dot = est.perm(Q_T, y.reshape(-1, 1))           # (N*T, 1)
x_dot = est.perm(Q_T, X)                           # (N*T, K)

#Keep only log_ansatte and log_kapital in x_dot
x_dot = x_dot[:, 1:]  # Remove the first column (constant term)
label_x = ['log_ansatte', 'log_kapital']  # Update labels accordingly

# Estimate the fe model using the estimate function
fe_result = est.estimate(y_dot, x_dot, robust_se=True, transform='fe', T=T)
est.print_table((label_y, label_x), fe_result, title='FE regression', floatfmt='.4f')

Dimension of Q_T: (12, 12)
FE regression
Dependent variable: log_omsætning

               Beta      Se    t-values
-----------  ------  ------  ----------
log_ansatte  0.6942  0.0417     16.6674
log_kapital  0.1546  0.0299      5.1630
R² = 0.477
σ² = 0.018


In [10]:
b_hat = fe_result['b_hat']
cov   = fe_result['cov']

ansatte_index = label_x.index('log_ansatte')
kapital_index = label_x.index('log_kapital')

# Gør vektorer/matricer klar
b = np.asarray(b_hat).reshape(-1)   # (P,)
V = np.asarray(cov)                 # (P,P)
P = b.size
if V.shape != (P, P):
    raise ValueError(f"Kovarians har forkert form: {V.shape}, forventede {(P,P)}")

# Byg R (1 x P) med 1 på de to relevante koefficienter
R = np.zeros((1, P))
R[0, [ansatte_index, kapital_index]] = 1.0

# Wald-test for H0: beta_kapital + beta_ansatte = 1
num = float(R @ b - 1.0)           # c'β - q, q=1
den = float(R @ V @ R.T)           # c' V c
if not np.isfinite(den) or den <= 0:
    raise ValueError("Variansen af restriktionen er ikke positiv/finitsk; tjek kovarians og indeks.")

wald = (num ** 2) / den
p_value = stats.chi2.sf(wald, df=1)

print(f"Wald test statistic: {wald:.4f}")
print(f"p-value: {p_value:.4g}")

Wald test statistic: 19.4029
p-value: 1.058e-05


  num = float(R @ b - 1.0)           # c'β - q, q=1
  den = float(R @ V @ R.T)           # c' V c


# RE

In [None]:
############################### The between estimator ###############################   
#Calculate P_T
P_T = np.ones((1, T)) / T
#Print the shape of P_T
print(f'Dimension of P_T: {P_T.shape}')

y_mean = est.perm(P_T, y.reshape(-1, 1))   # (N*T,   )
x_mean = est.perm(P_T, X)           # (N*T, K )

#Remove the constant term
x_mean = x_mean[:, 1:]
label_x = ['log_ansatte', 'log_kapital']  # Update labels accordingly

#Estimate the between model using the estimate function
be_result = est.estimate(y_mean, x_mean, robust_se=True, transform='be', T=T)
est.print_table((label_y, label_x), be_result, title="Between Estimator", floatfmt='.4f')

In [None]:
############################## Calculate lambda ########################
sigma2_u = fe_result['sigma2']
sigma2_w = float(((u := (np.asarray(y_mean).reshape(-1,1) - np.asarray(x_mean) @ np.asarray(be_result['b_hat']).reshape(-1,1))).T @ u) / (np.asarray(x_mean).shape[0] - np.asarray(x_mean).shape[1]))
sigma2_c = sigma2_w - (sigma2_u / T)
_lambda = 1 - np.sqrt(sigma2_u / (T * sigma2_c + sigma2_u))

# Print lambda 
print(f'Lambda is approximately equal to {_lambda.item():.4f}.')

In [None]:
############################### Estimate RE model ###############################
# # Create the identity matrix with dimension T
I_T = np.eye(T)
#Create C_T using I_T and P_T
C_T = I_T - _lambda @ P_T
#Transform y and x using perm function
y_re = est.perm(C_T, y.reshape(-1, 1))           # (N*T, 1)
x_re = est.perm(C_T, X)                           # (N*T, K)

# Estimate 
re_result = est.estimate(y_re, x_re, robust_se=True, transform='re', T=T)
est.print_table((label_y, label_x), re_result, title="Random Effects", floatfmt='.4f')

## FD

In [11]:
#Transform data
def fd_matrix(T):
    D_T = np.zeros((T-1, T))
    for t in range(T-1):
        D_T[t, t]   = -1
        D_T[t, t+1] =  1
    return D_T                  

# Print the matrix
D = fd_matrix(T) # (T-1, T)
print(f'Dimension of D: {D.shape}')

#Transform y and x using the perm function
y_diff = est.perm(D, y.reshape(-1, 1))   # (N*T-1,1)
x_diff = est.perm(D, X)           # (N*T-1, K )

#Remove the constant term
x_diff = x_diff[:, 1:]
label_x = ['log_ansatte', 'log_kapital']  # Update labels accordingly

# Estimate FE OLS using the demeaned variables.
fd_result = est.estimate(y_diff, x_diff, transform='fd', T=T)

# Print results
est.print_table((label_y, label_x), fd_result, title='FD regression', floatfmt='.4f')

Dimension of D: (11, 12)
FD regression
Dependent variable: log_omsætning

               Beta      Se    t-values
-----------  ------  ------  ----------
log_ansatte  0.5487  0.0183     29.9635
log_kapital  0.0630  0.0191      3.3043
R² = 0.165
σ² = 0.014


In [12]:
b_hat = fd_result['b_hat']
cov   = fd_result['cov']

ansatte_index = label_x.index('log_ansatte')
kapital_index = label_x.index('log_kapital')

# Gør vektorer/matricer klar
b = np.asarray(b_hat).reshape(-1)   # (P,)
V = np.asarray(cov)                 # (P,P)
P = b.size
if V.shape != (P, P):
    raise ValueError(f"Kovarians har forkert form: {V.shape}, forventede {(P,P)}")

# Byg R (1 x P) med 1 på de to relevante koefficienter
R = np.zeros((1, P))
R[0, [ansatte_index, kapital_index]] = 1.0

# Wald-test for H0: beta_kapital + beta_ansatte = 1
num = float(R @ b - 1.0)           # c'β - q, q=1
den = float(R @ V @ R.T)           # c' V c
if not np.isfinite(den) or den <= 0:
    raise ValueError("Variansen af restriktionen er ikke positiv/finitsk; tjek kovarians og indeks.")

wald = (num ** 2) / den
p_value = stats.chi2.sf(wald, df=1)

print(f"Wald test statistic: {wald:.4f}")
print(f"p-value: {p_value:.4g}")

Wald test statistic: 251.7295
p-value: 1.09e-56


  num = float(R @ b - 1.0)           # c'β - q, q=1
  den = float(R @ V @ R.T)           # c' V c


In [None]:
#####Resultattabel######
# ===================== Robust LaTeX table (handles nested arrays/lists) =====================
import math
import numpy as np
from collections import OrderedDict

# ---------- squeeze helpers ----------
def _squeeze_scalar(x):
    """
    Returnér float af x selv om x er [[v]], np.array([[v]]), [v], eller skalar.
    """
    # numpy vej
    try:
        arr = np.asarray(x)
        if arr.size == 1:
            return float(arr.reshape(-1)[0])
    except Exception:
        pass
    # liste/tuple vej
    if isinstance(x, (list, tuple)) and len(x) == 1:
        return _squeeze_scalar(x[0])
    return float(x)

def _squeeze_vector(x):
    """
    Returnér liste af floats, selv hvis x er (k,1), list of lists, pandas Series etc.
    """
    if x is None:
        return None
    try:
        arr = np.asarray(x)
        if arr.ndim == 0:
            return [_squeeze_scalar(arr)]
        if arr.ndim >= 1:
            arr = arr.reshape(arr.shape[0], -1) if arr.ndim > 1 else arr
            out = []
            if arr.ndim == 1:
                for v in arr:
                    out.append(_squeeze_scalar(v))
            else:
                for row in arr:
                    out.append(_squeeze_scalar(row))
            return out
    except Exception:
        pass
    # fallback: iterer python-agtigt
    try:
        return [_squeeze_scalar(v) for v in x]
    except Exception:
        # sidste udvej: prøv som skalar
        return [_squeeze_scalar(x)]

def _squeeze_optional_scalar(x):
    if x is None:
        return None
    try:
        return _squeeze_scalar(x)
    except Exception:
        return None

# ---------- name normalization ----------
def _normalize_const_name(name):
    return "const" if name in ("const", "_cons", "Intercept", "intercept") else name

def _normalize_const_in_dict(d):
    if d is None:
        return {}
    if "const" in d:
        return d
    for k in list(d.keys()):
        if k in ("_cons", "Intercept", "intercept"):
            d["const"] = d.pop(k)
            break
    return d

def _as_named_dict(values, expected_order=None):
    """
    Map værdier (liste af floats) til navne givet expected_order.
    """
    if values is None:
        return {}
    if expected_order is None:
        # sidste udvej: nummerér (sjældent brugbart til pæn tabel)
        return {str(i): float(v) for i, v in enumerate(values)}
    if len(values) != len(expected_order):
        raise ValueError(f"Længden af værdier ({len(values)}) passer ikke til expected_order ({len(expected_order)}).")
    return { _normalize_const_name(expected_order[i]): float(values[i]) for i in range(len(values)) }

# ---------- p-værdier / stjerner ----------
def _p_two_sided_from_t(t, df=None):
    # Normal-approksimation (god ved moderat/stort n)
    try:
        t = float(t)
    except Exception:
        return None
    return 2.0 * (1.0 - 0.5 * (1.0 + math.erf(abs(t) / math.sqrt(2.0))))

def _star(p):
    if p is None:
        return ""
    try:
        p = float(p)
    except Exception:
        return ""
    if p < 0.01:
        return r"^{***}"
    if p < 0.05:
        return r"^{**}"
    if p < 0.10:
        return r"^{*}"
    return ""

# ---------- extractor til din estimate-struct ----------
def extract_from_estimate_dict(res_dict, expected_order):
    """
    res_dict: {'b_hat','se','sigma2','t_values','R2','cov'}
    expected_order: rækkefølge af variable i X (inkl. 'const' hvis med)
    """
    b_hat  = _squeeze_vector(res_dict.get("b_hat"))
    se     = _squeeze_vector(res_dict.get("se"))
    tvals  = _squeeze_vector(res_dict.get("t_values"))
    r2     = _squeeze_optional_scalar(res_dict.get("R2"))

    coef_d = _normalize_const_in_dict(_as_named_dict(b_hat, expected_order))
    se_d   = _normalize_const_in_dict(_as_named_dict(se,   expected_order)) if se is not None else {}
    t_d    = _normalize_const_in_dict(_as_named_dict(tvals, expected_order)) if tvals is not None else {}

    return dict(coef=coef_d, se=se_d, t=t_d, r2=r2)

# ---------- hovedfunktion ----------
def make_latex_table(results, model_names,
                     expected_orders,         # liste af expected var-order per model
                     nobs_list,               # antal observationer pr. model
                     dfs_list,                # frihedsgrader pr. model (bruges info; p approx normal)
                     var_order=None,          # endelig visningsrækkefølge
                     var_labels=None,
                     include_const=True,
                     decimals=3,
                     caption="Regression results",
                     label="tab:reg",
                     note=None):
    extracted = []
    all_vars = set()
    for res, exp in zip(results, expected_orders):
        e = extract_from_estimate_dict(res, exp)
        extracted.append(e)
        all_vars |= set(e["coef"].keys())

    if not include_const and "const" in all_vars:
        all_vars.remove("const")

    if var_order is None:
        var_order = (["const"] if include_const and "const" in all_vars else []) + \
                    sorted([v for v in all_vars if v != "const"])

    var_labels = var_labels or {}
    def lab(v): return var_labels.get(v, v)

    # p-værdier fra t (normal-approx)
    pvals_list = []
    for j, e in enumerate(extracted):
        tmap = e["t"]
        pmap = {k: _p_two_sided_from_t(tv, df=dfs_list[j]) for k, tv in tmap.items()} if tmap else {}
        pvals_list.append(pmap)

    if note is None:
        note = "Robuste SE i parentes. * p<0.10, ** p<0.05, *** p<0.01"

    cols = len(results)
    lines = []
    lines.append(r"\begin{table}[!htbp]")
    lines.append(r"\centering")
    lines.append(r"\begin{tabular}{l" + "c"*cols + r"}")
    lines.append(r"\toprule")
    lines.append(" & ".join([""] + list(model_names)) + r" \\")
    lines.append(r"\midrule")

    for v in var_order:
        row_coef = [lab(v)]
        row_se   = [""]
        for j, e in enumerate(extracted):
            b = e["coef"].get(v, None)
            s = e["se"].get(v, None) if e["se"] else None
            p = pvals_list[j].get(v, None) if pvals_list[j] else None
            if b is None:
                row_coef.append("")
                row_se.append("")
            else:
                row_coef.append(f"{float(b):.{decimals}f}{_star(p)}")
                row_se.append(f"({float(s):.{decimals}f})" if s is not None else "")
        lines.append(" & ".join(row_coef) + r" \\")
        lines.append(" & ".join(row_se)   + r" \\")
    lines.append(r"\midrule")

    # Observations
    n_row = ["Observations"]
    for n in nobs_list:
        n_row.append(f"{int(n)}" if n is not None else "")
    lines.append(" & ".join(n_row) + r" \\")

    # DoF (kan fjernes hvis du ikke vil vise det)
    df_row = ["DoF"]
    for d in dfs_list:
        df_row.append(f"{int(d)}" if d is not None else "")
    lines.append(" & ".join(df_row) + r" \\")

    # R^2
    r2_row = [r"$R^2$"]
    for e in extracted:
        r = e["r2"]
        r2_row.append(f"{float(r):.{decimals}f}" if r is not None else "")
    lines.append(" & ".join(r2_row) + r" \\")

    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    lines.append(rf"\caption{{{caption}}}")
    lines.append(rf"\label{{{label}}}")
    lines.append(r"\begin{flushleft}\footnotesize " + note + r"\end{flushleft}")
    lines.append(r"\end{table}")

    return "\n".join(lines)

# ============================== USAGE ==============================
# Forudsæt: N, T, pooled_result, fe_result, fd_result findes

# 1) Forventet variabelrækkefølge per model
expected_orders = [
    ['const', 'log_ansatte', 'log_kapital'],  # OLS (med konstant)
    ['log_ansatte', 'log_kapital'],           # FE (ingen konstant)
    ['log_ansatte', 'log_kapital'],           # FD (ingen konstant)
]

# 2) Visningsrækkefølge og labels
var_order  = ['const', 'log_ansatte', 'log_kapital']
var_labels = {'const': 'Konstant', 'log_ansatte': 'Log ansatte', 'log_kapital': 'Log kapital'}

# 3) Antal observationer og frihedsgrader pr. model
n_ols = N * T
n_fe  = N * T
n_fd  = N * (T - 1)

k_ols = 3  # const + log_ansatte + log_kapital
k_fe  = 2
k_fd  = 2

df_ols = n_ols - k_ols
df_fe  = n_fe  - k_fe
df_fd  = n_fd  - k_fd

nobs_list = [n_ols, n_fe, n_fd]
dfs_list  = [df_ols, df_fe, df_fd]

# 4) Byg LaTeX, print og gem
latex_table = make_latex_table(
    results=[pooled_result, fe_result, fd_result],
    model_names=["Pooled OLS", "FE (within)", "FD"],
    expected_orders=expected_orders,
    nobs_list=nobs_list,
    dfs_list=dfs_list,
    var_order=var_order,
    var_labels=var_labels,
    include_const=True,
    decimals=3,
    caption="Pooled OLS, FE og FD",
    label="tab:ols_fe_fd",
    note="Robuste SE i parentes. Stjerner baseret på normal-approksimation fra t-stat. * p<0.10, ** p<0.05, *** p<0.01"
)

print(latex_table)

with open("reg_table.tex", "w", encoding="utf-8") as f:
    f.write(latex_table)

print("\nLaTeX-tabellen er også gemt som 'reg_table.tex'.")



## Hausmann 

In [None]:
# Unpack the betas 
b_fe = fe_result['b_hat']
b_re = re_result['b_hat']
cov_fe = fe_result['cov']
cov_re = re_result['cov']

b_diff = b_fe - b_re
cov_diff = cov_fe - cov_re
H = b_diff.T @ la.inv(cov_diff) @ b_diff

# Find critical value and p-value at 5% significance level of chi^2 with M degrees of freedom
M = len(b_fe)
crit_val = chi2.ppf(0.95, df=M)
p_val = 1 - chi2.cdf(H, df=M)


# Print the results
print(f'The test statistic is {H.item():.2f}.')
print(f'The critical value at a 5% significance level is {crit_val:.2f}.')
print(f"The p-value is {float(np.asarray(p_val).reshape(-1)[0]):.8f}.")

Vi aviser at RE og FE er ens, hvilket indikerer korrelation mellem A og kapital,employees 

## Test for strict exo 

## Hypothesis test