In [None]:
import numpy as np
import pandas as pd

# Create Synthetic Dataset

In [25]:
n_samples = 1000
n_features = 3

X = np.random.randn(n_samples, n_features)  # independent standard normal

# True coefficients
true_coef = np.array([2.5, -1.5, 3.0])  
true_intercept = 1.0

# Generate target
noise = np.random.normal(0, 1.5, n_samples) # mean 0, std 1.5
y = true_intercept + X @ true_coef + noise

# Create DataFrame
feature_names = [f'X{i+1}' for i in range(n_features)]
df = pd.DataFrame(X, columns=feature_names)
df['y'] = y
df.head(2)

Unnamed: 0,X1,X2,X3,y
0,-0.360113,-0.518994,-2.276932,-6.232628
1,-0.837049,-0.405215,0.444886,-2.016484


# Running OLS

## 1. Run OLS with statsmodels

In [24]:
import statsmodels.api as sm
# OLS with statsmodels
model = sm.OLS(y,sm.add_constant(X)).fit()
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.885
Model:,OLS,Adj. R-squared:,0.885
Method:,Least Squares,F-statistic:,2566.0
Date:,"Fri, 20 Feb 2026",Prob (F-statistic):,0.0
Time:,13:59:15,Log-Likelihood:,-1827.8
No. Observations:,1000,AIC:,3664.0
Df Residuals:,996,BIC:,3683.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.9943,0.048,20.825,0.000,0.901,1.088
x1,2.4978,0.046,54.005,0.000,2.407,2.589
x2,-1.4944,0.047,-31.979,0.000,-1.586,-1.403
x3,3.0318,0.048,63.413,0.000,2.938,3.126

0,1,2,3
Omnibus:,2.517,Durbin-Watson:,2.0
Prob(Omnibus):,0.284,Jarque-Bera (JB):,2.485
Skew:,0.067,Prob(JB):,0.289
Kurtosis:,3.204,Cond. No.,1.09


## 2. Run OLS with numpy

In [26]:
import numpy as np

In [37]:
#(X^TX)^-1 X^Ty
X_ = np.column_stack([np.ones(X.shape[0]), X])
beta_hat = np.linalg.inv(X_.T@X_)@X_.T@y # normal equation

intercept = beta_hat[0]
coefficients = beta_hat[1:]

print(f"Intercept: {intercept:.4f}")
print(f"Coefficients: {coefficients}")

y_pred_manual = X_ @ beta_hat

Intercept: 1.0282
Coefficients: [ 2.53838269 -1.59908112  2.95981423]
