# 1.Linear regression

In [48]:
import numpy as np

class CustomeLinearRegression:
    def __init__(self, X_data, y_target, learning_rate = 0.001, num_epochs = 1000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones(self.num_samples), X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

        #initial weights
        #self.theta = np.random.randn(self.X_data.shape[1],1)
        self.theta = [[10],[3],[2]]
        self.losses = []

    def compute_loss(self, y_pred, y_target):
        #MSE loss
        loss = np.mean(np.square(y_pred - y_target))
        return loss

    def predict(self, X_data):
        pred = X_data.dot(self.theta)
        return pred

    def fit(self):
        for epoch in range(self.num_epochs):
            #predict
            y_pred = self.predict(self.X_data)
            #loss
            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)
            #gradient
            loss_grd = 2*(y_pred - self.y_target)/self.num_samples
            gradients = self.X_data.T.dot(loss_grd)
            #update weights
            self.theta -= self.learning_rate*gradients

            if epoch%100 == 0:
                #print(f"y_pred {y_pred}")
                print(f"Epoch {epoch} - Loss: {loss}")

        return {
            'loss': sum(self.losses)/len(self.losses),
            'weight': self.theta
        }


X_data = np.array([[3, 12],
                [4, 13],
                [5, 14],
                [6, 15]])
y_target = [[60],
            [55],
            [66],
            [93]]

custom_linear_regression = CustomeLinearRegression(X_data, y_target, learning_rate=0.001, num_epochs=10000)
custom_linear_regression.fit()



Epoch 0 - Loss: 433.0
Epoch 100 - Loss: 83.61494924133054
Epoch 200 - Loss: 80.02642137934852
Epoch 300 - Loss: 77.09440973149393
Epoch 400 - Loss: 74.69880556349219
Epoch 500 - Loss: 72.74147386805129
Epoch 600 - Loss: 71.14223330187171
Epoch 700 - Loss: 69.83557158762478
Epoch 800 - Loss: 68.76796182860195
Epoch 900 - Loss: 67.89566979988979
Epoch 1000 - Loss: 67.18296239259618
Epoch 1100 - Loss: 66.60064382072825
Epoch 1200 - Loss: 66.12485962700168
Epoch 1300 - Loss: 65.73611949413248
Epoch 1400 - Loss: 65.41849883145463
Epoch 1500 - Loss: 65.1589864301613
Epoch 1600 - Loss: 64.94695146411968
Epoch 1700 - Loss: 64.7737080021495
Epoch 1800 - Loss: 64.6321591921785
Epoch 1900 - Loss: 64.51650654141551
Epoch 2000 - Loss: 64.42201238331396
Epoch 2100 - Loss: 64.34480580087568
Epoch 2200 - Loss: 64.28172405601912
Epoch 2300 - Loss: 64.23018302922489
Epoch 2400 - Loss: 64.18807136206907
Epoch 2500 - Loss: 64.15366396623433
Epoch 2600 - Loss: 64.12555135592726
Epoch 2700 - Loss: 64.102581

{'loss': 65.24217896505151,
 'weight': array([[9.13256935],
        [9.90334861],
        [1.09647272]])}

In [52]:
def r2score(y_pred, y):
    rss = np.sum((y_pred - y)**2)
    tss = np.sum((y - y.mean())** 2)
    r2 = 1 - (rss/tss)
    return r2

# Case 1
y_pred = np.array([1 , 2 , 3 , 4 , 5])
y = np.array([1 , 2 , 3 , 4 , 5])
print('Case 1:', r2score(y_pred, y))
# Case 1
y_pred = np.array([1 , 2 , 3 , 4 , 5])
y = np.array([3, 5, 5, 2, 4])
print('Case 2:', r2score(y_pred, y))

Case 1: 1.0
Case 2: -2.235294117647059


# 2.Polynominal Regression

In [58]:
def create_polynomial_feature(X, degree=2):
    """ Creates the polynomial features
    Args :
        X : A array tensor for the data .
        degree : A intege for the degree of the generated polynomial function .
    """
    X_new = X
    for i in range(2, degree+1):
        X_new = np.c_[X_new, np.power(X,i)]

    return X_new

X = np.array([[1],[2],[3]])
print(create_polynomial_feature(X, degree=2))

[[1 1]
 [2 4]
 [3 9]]


In [59]:
def create_polynomial_features(X, degree=2):
    """ Creates the polynomial features
    Args :
        X : A array for the data .
        degree : A intege for the degree of the generated polynomial function .
    """
    X_mem = []
    for X_sub in X.T:
        X_sub = X_sub.T
        X_new = X_sub
        for d in range(2, degree + 1):
            X_new = np.c_[X_new, np.power(X_sub, d)]
            X_mem.extend(X_new.T)

    return np.c_[X_mem].T

X = np.array([[1, 2],
            [2, 3],
            [3, 4]])
X_new = create_polynomial_features(X, degree=2)
print(X_new)

[[ 1  1  2  4]
 [ 2  4  3  9]
 [ 3  9  4 16]]


# 3.Sales Prediction

## 3.1.Load and analyze data

In [83]:
! gdown https://drive.google.com/uc?id=1A8kK0IEsT3w8htzU18ihFr5UV-euhquC

Downloading...
From: https://drive.google.com/uc?id=1A8kK0IEsT3w8htzU18ihFr5UV-euhquC
To: /content/SalesPrediction.csv
  0% 0.00/206k [00:00<?, ?B/s]100% 206k/206k [00:00<00:00, 87.1MB/s]


In [84]:
import pandas as pd

df = pd.read_csv('SalesPrediction.csv')
df.head()

Unnamed: 0,TV,Radio,Social Media,Influencer,Sales
0,16.0,6.566231,2.907983,Mega,54.732757
1,13.0,9.237765,2.409567,Mega,46.677897
2,41.0,15.886446,2.91341,Mega,150.177829
3,83.0,30.020028,6.922304,Mega,298.24634
4,15.0,8.437408,1.405998,Micro,56.594181


In [85]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4572 entries, 0 to 4571
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   TV            4562 non-null   float64
 1   Radio         4568 non-null   float64
 2   Social Media  4566 non-null   float64
 3   Influencer    4572 non-null   object 
 4   Sales         4566 non-null   float64
dtypes: float64(4), object(1)
memory usage: 178.7+ KB


In [86]:
df.describe()

Unnamed: 0,TV,Radio,Social Media,Sales
count,4562.0,4568.0,4566.0,4566.0
mean,54.066857,18.160356,3.323956,192.466602
std,26.125054,9.676958,2.21267,93.133092
min,10.0,0.000684,3.1e-05,31.199409
25%,32.0,10.525957,1.527849,112.322882
50%,53.0,17.859513,3.055565,189.231172
75%,77.0,25.64973,4.807558,272.507922
max,100.0,48.871161,13.981662,364.079751


## 3.2.Reprocessing data

In [98]:
#One hot encoding
df = pd.get_dummies(df)
df.head()

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.91341,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.24634,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False


In [99]:
#handling null values
df = df.fillna(df.mean())
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4572 entries, 0 to 4571
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   TV                4572 non-null   float64
 1   Radio             4572 non-null   float64
 2   Social Media      4572 non-null   float64
 3   Sales             4572 non-null   float64
 4   Influencer_Macro  4572 non-null   bool   
 5   Influencer_Mega   4572 non-null   bool   
 6   Influencer_Micro  4572 non-null   bool   
 7   Influencer_Nano   4572 non-null   bool   
dtypes: bool(4), float64(4)
memory usage: 160.9 KB


In [100]:
#get features and target
X = df[df.columns.difference(['Sales'])]
y = df[['Sales']]

print(X.head())
print(y.head())

#train test split
import sklearn
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)



   Influencer_Macro  Influencer_Mega  Influencer_Micro  Influencer_Nano  \
0             False             True             False            False   
1             False             True             False            False   
2             False             True             False            False   
3             False             True             False            False   
4             False            False              True            False   

       Radio  Social Media    TV  
0   6.566231      2.907983  16.0  
1   9.237765      2.409567  13.0  
2  15.886446      2.913410  41.0  
3  30.020028      6.922304  83.0  
4   8.437408      1.405998  15.0  
        Sales
0   54.732757
1   46.677897
2  150.177829
3  298.246340
4   56.594181


## 3.3.Feature Scaling

In [101]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
X_test_processed = scaler.transform(X_test)
scaler.mean_[0]

0.24779627815866798

## 3.4.Polynomial Features

In [102]:
from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree =2)

X_train_poly = poly_features.fit_transform(X_train_processed)
X_test_poly = poly_features.transform(X_test_processed)



## 3.5.Training & Evaluation

In [103]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

preds = poly_model.predict(X_test_poly)
r2_score(y_test, preds)

0.9951743050661145