
### Cory Melendez
### Assignment: Linear Regression
### CS5262-50

### group stuff together
randn method

In [12]:
import pandas as pd
from typing import Callable
import numpy as np

In [13]:
food_truck_txt = 'food_truck_data.txt'
housing_txt = 'housing_price_data.txt'

In [14]:
def get_food_truck_data() -> pd.DataFrame:
    df = pd.read_csv(food_truck_txt, header=None)
    df.columns = ['population', 'profit']

    return df

In [15]:
food_truck_df = get_food_truck_data()
food_truck_df

Unnamed: 0,population,profit
0,6.1101,17.59200
1,5.5277,9.13020
2,8.5186,13.66200
3,7.0032,11.85400
4,5.8598,6.82330
...,...,...
92,5.8707,7.20290
93,5.3054,1.98690
94,8.2934,0.14454
95,13.3940,9.05510


## Define a linear model to the data

#### Define our prediction function as

$$
h_\theta(X) = \begin{pmatrix} 1 \\ x_1 \\ ... \\ x_n \end{pmatrix} \cdot \begin{pmatrix} \theta_0 \\ \theta_1 \\ ... \\ \theta_n \end{pmatrix}
$$

$$
= X \cdot \theta
$$

$$
= \theta_0 + \theta_1 x_1 + ... + \theta_n x_n
$$


#### We can then calculate our cost function as the following using least squares:

$$
J(\theta) = (1/2m) \sum_{i=1}^m(h_\theta(X_i) - y_i)^2
$$

#### s.t. $X_i$ is the vector of variables of a row $i$

#### We can then define our gradient descent as:

$$
\theta_j = \theta_j - \alpha (\partial/\partial \theta_j) J(\theta)
$$

####

$$
(\partial/\partial \theta_j) J(\theta) = (\partial/\partial \theta_j) (1/2m) \sum_{i=1}^m(h_\theta(X_i) - y_i)^2
$$

$$
=(1/2m)((\partial/\partial \theta_0)(h_\theta(X_0)-y_0)^2 +
$$

$$
(\partial/\partial \theta_1)(h_\theta(X_1)-y_1)^2 + ... +
$$

$$
(\partial/\partial \theta_m)(h_\theta(X_m)-y_m)^2)
$$

#### Find differential and dont forget chain rule

$$
=(1/2m)(2 (h_\theta(X_0)-y_0) (\partial/\partial \theta_0)(h_\theta(X_0)-y_0) +
$$

$$
2 (h_\theta(X_1)-y_1)(\partial/\partial \theta_1)(h_\theta(X_1)-y_1) + ... +
$$

$$
2 (h_\theta(X_m)-y_m) (\partial/\partial \theta_m)(h_\theta(X_m)-y_m))
$$


#### Remember that:

$$
h_\theta(X) = \begin{pmatrix} 1 \\ x_1 \\ ... \\ x_n \end{pmatrix} \cdot \begin{pmatrix} \theta_0 \\ \theta_1 \\ ... \\ \theta_n \end{pmatrix} = \theta_0 + \theta_1 x_1 + ... + \theta_n x_n
$$

#### So,

$$
= (1/2m)(2 (h_\theta(X_0)-y_0) (\partial/\partial \theta_0)(\theta_0 + \theta_1 x_1 + ... + \theta_n x_n - y_0) +
$$

$$
2 (h_\theta(X_0)- y_1)(\partial/\partial \theta_1)(\theta_0 + \theta_1 x_1 + ... + \theta_n x_n - y_1) + ... +
$$

$$
2 (h_\theta(X_m)-y_m) (\partial/\partial \theta_m)(\theta_0 + \theta_1 x_1 + ... + \theta_m x_m - y_m))
$$


#### Therefore,
$$
(\partial/\partial \theta)J_\theta= (1/m)( (h_\theta(X_0)-y_0) (\partial/\partial \theta_0)(\theta_0 + \theta_1 x_1 + ... + \theta_n x_n - y_0) +
$$

$$
(h_\theta(X_0)- y_1)(\partial/\partial \theta_1)(\theta_0 + \theta_1 x_1 + ... + \theta_n x_n - y_1) + ... +
$$

$$
(h_\theta(X_m)-y_m) (\partial/\partial \theta_m)(\theta_0 + \theta_1 x_1 + ... + \theta_n x_n - y_m))
$$


#### Which can be simplified

$$
(\partial/\partial \theta)J_\theta = (1/m)((h_\theta(X_0) - y_0)(1) +
$$

$$
(h_\theta(X_1) - y_0)(x_1) + ... +
$$

$$
(h_\theta(X_m) - y_m)(x_n))
$$


#### Thus,

$$
(\partial/\partial \theta)J_\theta = (1/m) \sum_{i=0}^m (h_\theta(X_i) - y_i)x_{i,j}
$$


#### So our gradient descent is:
$$
\theta_j := \theta_j - (\alpha/m) \sum_{i=0}^m(h_\theta(X_i) - y_i)x_{i,j}
$$




#### We can re-arrange:

$$
\theta_j := \theta_j + (\alpha/m) \sum_{i=1}^m(y^{(i)} - h_\theta(x^{(i)}))x_{j}^{(i)}
$$


In [16]:
def get_X_j(_X: pd.DataFrame, jth_col: int) -> pd.Series:
    if jth_col == 0:
        return pd.Series([1] * get_m(_X))

    return _X.iloc[:, jth_col-1]

def get_X_i(_X: pd.DataFrame, ith_row: int) -> pd.Series:
    if ith_row == 0:
        return pd.Series([1] * (_X.shape[1] + 1))

    s: pd.Series = _X.iloc[ith_row-1]
    s = pd.Series([1]).append(s, ignore_index=True)
    return s

def get_m(_X: pd.DataFrame) -> int:
    return _X.shape[0]

def get_X_i_j(_X: pd.DataFrame, jth_col: int, ith_row: int) -> float:
    s = get_X_j(_X, jth_col)
    return s.iloc[ith_row-1]

def get_prediction_series(_X: pd.DataFrame, _thetas: pd.Series) -> pd.Series:
    s = []
    for i in range(1, get_m(_X)+1):
        x_i = get_X_i(_X, i)
        s.append(x_i.dot(thetas))

    return pd.Series(data=s)


In [17]:
Y = food_truck_df['profit']

In [18]:
X = food_truck_df[['population']]

In [19]:
print(f'rows: {X.shape[0]}, columns: {X.shape[1]}')

rows: 97, columns: 1


In [20]:
# thetas = pd.Series(data=[2,-1])
# alpha = .1
# #number of rows
# m = get_m(X)
# X_0 = get_X_j(X,0)
# P = get_prediction_series(X, thetas, 0)
# y_minus_p = Y - P

df = pd.DataFrame(data={'living_area': [2104, 1600, 2400, 1416,3000], 'price': [400,330,369,232,540]})
# df = pd.DataFrame(data={'living_area': [2104], 'price': [400]})
X = df[['living_area']]
Y = df['price']
thetas = pd.Series(data=[2,-1])


In [21]:
# previous_thetas = thetas.copy()
# for it in range(0,1):
#     for j in range(0, X.shape[1] + 1):
#         print(f'it: {it}, j: {j}')
#         X_j = get_X_j(X, j)
#         P = get_prediction_series(X, previous_thetas)
#         Y_minus_P = Y-P
#         X_j_dot_Y_minus_P = X_j.dot(Y_minus_P)
#         thetas[j] = previous_thetas[j] + (.1 * X_j_dot_Y_minus_P)
#     previous_thetas = thetas

# X_j = get_X_j(X, 0)
# P = get_prediction_series(X, thetas)
# Y_minus_P = Y-P
# X_j_dot_Y_minus_P = X_j.dot(Y_minus_P)
# theta_0 = thetas[0] + (.0001 * X_j_dot_Y_minus_P)
#
# X_j = get_X_j(X, 1)
# P = get_prediction_series(X, thetas)
# Y_minus_P = Y-P
# X_j_dot_Y_minus_P = X_j.dot(Y_minus_P)
# theta_1 = thetas[1] + (.0001 * X_j_dot_Y_minus_P)




In [22]:

previous_thetas = thetas.copy()
new_thetas = thetas.copy()
for it in range(0, 1000):
    previous_thetas = new_thetas.copy()
    for j in range(0, X.shape[1]+1):
        X_j = get_X_j(X, j)
        P = get_prediction_series(X, previous_thetas)
        Y_minus_P = Y-P
        X_j_dot_Y_minus_P = X_j.dot(Y_minus_P)
        new_thetas[j] = previous_thetas[j] + (.001 * X_j_dot_Y_minus_P)


  s = pd.Series([1]).append(s, ignore_index=True)
  s = pd.Series([1]).append(s, ignore_index=True)


KeyboardInterrupt: 