# Linear regression

In [17]:
import numpy as np
import random
import os

os.chdir('/Users/microwave/AIO_2024/Module_4/Project')
random.seed(42)

In [3]:
class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate = 0.01, epochs = 10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[ np.ones((self.num_samples ,1)),X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []

    def predict(self, X_data):
        return np.dot(X_data, self.theta)

    def compute_loss(self,y_pred,y_target):
        return np.mean((y_pred - y_target)**2)

    def fit(self):
        for epoch in range(self.epochs):
            y_pred = self.predict(self.X_data)

            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)

            loss_gradient = 2 * (y_pred - self.y_target)/self.num_samples
            gradients = self.X_data.T.dot(loss_gradient)

            self.theta = self.theta - self.learning_rate * gradients
            if epoch % 50 == 0:
                print(f'Epoch: {epoch}, Loss: {loss}')

        return {
            'loss': sum(self.losses)/len(self.losses),
            'weight':self.theta
        }


In [4]:
def r2score(y_pred,y):
    rss = np.sum((y_pred - y)**2)
    tss = np.sum((y - np.mean(y))**2)
    r2 = 1 - (rss/tss)
    return r2

y_pred = np.array([1,2,3,4,5])
y = np.array([1,2,3,4,5])

print(r2score(y_pred,y))

y_pred = np.array([1,2,3,4,5])
y = np.array([3,5,5,2,4])

print(r2score(y_pred,y))

1.0
-2.235294117647059


# Polynomial Regression

In [5]:
import numpy as np
from sklearn.preprocessing import PolynomialFeatures

In [6]:
def create_polynomial_features(X, degree=2):
    X_new = X
    for d in range(1,degree+1):
        X_new = np.c_[X_new, np.power(X,d)]
    return X_new

X = np.array([[1], [2], [3]])
X_new = create_polynomial_features(X, degree=2)
print(X_new)

[[1 1 1]
 [2 2 4]
 [3 3 9]]


In [13]:
def create_polynomial_features(X,degree=2):
    X_mem = []
    for X_sub in X.T:
        X_sub = X_sub.T
        X_new = X_sub
        for d in range(2,degree+1):
            X_new = np.c_[X_new, np.power(X_sub,d)]
        X_mem.extend(X_new.T)
    return np.c_[X_mem].T 

X = np.array([[1,2],
              [2,3],
              [3,4]])
X_new = create_polynomial_features(X, degree=2)
print(X_new)

[[ 1  1  2  4]
 [ 2  4  3  9]
 [ 3  9  4 16]]


# Sales preditction

In [18]:
import pandas as pd
df = pd.read_csv('Sales Prediction.csv')

In [21]:
df = pd.get_dummies(df)
df.head(5)

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.91341,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.24634,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False


In [22]:
df = df.fillna(df.mean())

X = df[['TV','Radio','Social Media', 'Influencer_Macro','Influencer_Mega','Influencer_Micro','Influencer_Nano']]
y = df['Sales']


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [23]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
scaler.mean_[0]

54.173577723283785

In [24]:
from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree=2)

X_train_poly = poly_features.fit_transform(X_train)
X_test_poly = poly_features.fit_transform(X_test)

In [25]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


lr = LinearRegression()
lr.fit(X_train_poly, y_train)
y_pred = lr.predict(X_test_poly)
r2score(y_pred,y_test)

-4.787949687661898e+25