In [8]:
import numpy as np
import pandas as pd

In [11]:
df = pd.read_csv('student_performance.csv')

In [119]:
x = np.array(df.PreviousMarks)
y = np.array(df.PerformanceIndex)

La regresion lineal tiene esta forma

$$
\mathbf{y} = \mathbf{X}\boldsymbol{\beta} 
$$

$$
\mathbf{X}^\top\mathbf{X}\hat{\boldsymbol{\beta}} =\mathbf{X}^\top \mathbf{y}
$$

$$
\hat{\boldsymbol{\beta}} = (\mathbf{X}^\top \mathbf{X})^{-1}\mathbf{X}^\top \mathbf{y}
$$

Usando la descomposición QR:

$$
\mathbf{X} = \mathbf{Q}\mathbf{R}
$$

Derivación paso a paso:

$$
\begin{aligned}
\hat{\boldsymbol{\beta}} 
&= (\mathbf{Q}^\top \mathbf{R}^\top \mathbf{Q}\mathbf{R})^{-1}\mathbf{Q}^\top \mathbf{R}^\top \mathbf{y} \\[2mm]
&= (\mathbf{R}^\top \mathbf{R})^{-1}\mathbf{R}^\top \mathbf{Q}^\top \mathbf{y} \\[2mm]
&= \mathbf{R}^{-1}\mathbf{Q}^\top \mathbf{y}
\end{aligned}
$$

Finalmente,

$$
\hat{\boldsymbol{\beta}}\mathbf{R} = \mathbf{Q}^\top \mathbf{y}
$$



Tiene esta forma
$$
\mathbf{X} \boldsymbol{\beta} =
\begin{bmatrix}
1 & x_{11} & x_{12} & \cdots & x_{1p} \\
1 & x_{21} & x_{22} & \cdots & x_{2p} \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
1 & x_{n1} & x_{n2} & \cdots & x_{np} \\
\end{bmatrix}
\begin{bmatrix}
\beta_0 \\
\beta_1 \\
\vdots \\
\beta_p
\end{bmatrix}
=
\begin{bmatrix}
y_1 \\
y_2 \\
\vdots \\
y_n
\end{bmatrix}
$$



In [125]:
x = x.reshape(-1,1)

X = np.hstack([np.ones((x.shape[0],1)), x])

Q, R = np.linalg.qr(X)

Qt_y = Q.T @ y

beta = np.linalg.solve(R, Qt_y)
print("\nbeta (intercepto, pendiente):\n", beta)


beta (intercepto, pendiente):
 [-15.18179943   1.0138367 ]


In [121]:
x = x.reshape(1,-1)
x_mean = np.mean(x)
y_mean = np.mean(y)
beta_1 = np.sum((x - x_mean) * (y - y_mean)) / np.sum((x - x_mean)**2)
beta_0 = y_mean - beta_1 * x_mean
print(f"beta0 = {beta_0:.4f}")
print(f"beta1 = {beta_1:.4f}")

beta0 = -15.1818
beta1 = 1.0138


In [None]:
from sklearn.linear_model import LinearRegression

x = x.reshape(-1,1)

# Initialize and fit the model
model = LinearRegression()
model.fit(x, y)
print(model.intercept_)
print(model.coef_)


-15.181799427126236
[1.0138367]
