In [2]:
import numpy as np

In [4]:
class CustomLinearRegression:
    def __init__ ( self , X_data , y_target , learning_rate =0.01 , num_epochs =10000) :
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones(self.num_samples, 1), X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

        # Initialize the weights
        self.theta = np.random.rand(self.X_data.shape[1], 1)
        self.losses = []

    def predict(self, X_data):
        y_pred = X_data.dot(self.theta)
        return y_pred

    def compute_loss(self, y_pred, y_target):
        loss = np.mean((y_pred - y_target) ** 2)
        return loss

    def fit(self):
        for epoch in range(self.num_epochs):
            y_pred = self.predict(self.X_data)
            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)

            loss_gradient = 2 * (y_pred - self.y_target) / self.num_samples
            gradient = self.X_data.T.dot(loss_gradient)

            self.theta = self.theta - self.learning_rate * gradient

            if (epoch % 50) == 0:
                print(f'Epoch {epoch}, Loss: {loss}')

        return {
            'loss' : sum(self.losses) / len(self.losses),
            'weight' : self.theta
        }

def r2score(y_pred, y):
    rss = np.sum((y_pred - y) ** 2)
    tss = np.sum((y - y.mean()) ** 2)
    r2 = 1 - (rss / tss)
    return r2

# case 1
y_pred = np.array([1 , 2 , 3 , 4 , 5])
y = np.array([1 , 2 , 3 , 4 , 5])
print(r2score(y_pred, y))

# case 2
y_pred = np.array([1 , 2 , 3 , 4 , 5])
y = np.array([3 , 5 , 5 , 2 , 4])
print(r2score(y_pred, y))

1.0
-2.235294117647059


In [5]:
def create_polynomial_features(X , degree =2):
    X_new = X
    for d in range(2 , degree +1):
        X_new = np.c_[X_new , np.power(X , d)]

    return X_new

X = np . array ([[1] , [2] , [3]])
X_new = create_polynomial_features(X , degree =2)
print(X_new)

[[1 1]
 [2 4]
 [3 9]]


In [6]:
X = np . array ([[1 , 2] ,
[2 , 3] ,
[3 , 4]])

degree = 2

def create_polynomial_features(X , degree =2) :
    X_mem = []
    for X_sub in X.T:
        X_sub = X_sub.T
        X_new = X_sub
        for d in range(2 , degree +1):
            X_new = np.c_[X_new , np.power(X_sub , d)]
        X_mem.append(X_new.T)
    return np.c_[X_mem].T

X_new = create_polynomial_features(X , degree =2)
print(X_new)

[[[ 1  2]
  [ 1  4]]

 [[ 2  3]
  [ 4  9]]

 [[ 3  4]
  [ 9 16]]]


In [7]:
import pandas as pd

df = pd.read_csv('./SalesPrediction.csv')
df.head()

Unnamed: 0,TV,Radio,Social Media,Influencer,Sales
0,16.0,6.566231,2.907983,Mega,54.732757
1,13.0,9.237765,2.409567,Mega,46.677897
2,41.0,15.886446,2.91341,Mega,150.177829
3,83.0,30.020028,6.922304,Mega,298.24634
4,15.0,8.437408,1.405998,Micro,56.594181


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4572 entries, 0 to 4571
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   TV            4562 non-null   float64
 1   Radio         4568 non-null   float64
 2   Social Media  4566 non-null   float64
 3   Influencer    4572 non-null   object 
 4   Sales         4566 non-null   float64
dtypes: float64(4), object(1)
memory usage: 178.7+ KB


In [9]:
df.describe()

Unnamed: 0,TV,Radio,Social Media,Sales
count,4562.0,4568.0,4566.0,4566.0
mean,54.066857,18.160356,3.323956,192.466602
std,26.125054,9.676958,2.21267,93.133092
min,10.0,0.000684,3.1e-05,31.199409
25%,32.0,10.525957,1.527849,112.322882
50%,53.0,17.859513,3.055565,189.231172
75%,77.0,25.64973,4.807558,272.507922
max,100.0,48.871161,13.981662,364.079751


In [10]:
df = pd.get_dummies(df)
df.head()

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.91341,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.24634,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False


In [11]:
df.isnull().sum()

TV                  10
Radio                4
Social Media         6
Sales                6
Influencer_Macro     0
Influencer_Mega      0
Influencer_Micro     0
Influencer_Nano      0
dtype: int64

In [12]:
df = df.fillna(0)
df.isnull().sum()

TV                  0
Radio               0
Social Media        0
Sales               0
Influencer_Macro    0
Influencer_Mega     0
Influencer_Micro    0
Influencer_Nano     0
dtype: int64

In [13]:
df = df.fillna(df.mean())
df.isnull().sum()

TV                  0
Radio               0
Social Media        0
Sales               0
Influencer_Macro    0
Influencer_Mega     0
Influencer_Micro    0
Influencer_Nano     0
dtype: int64

In [16]:
df[['TV', 'Radio', 'Social Media', 'Sales']].corr()

Unnamed: 0,TV,Radio,Social Media,Sales
TV,1.0,0.860518,0.522565,0.98857
Radio,0.860518,1.0,0.60445,0.86379
Social Media,0.522565,0.60445,1.0,0.526777
Sales,0.98857,0.86379,0.526777,1.0


In [22]:
X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro' ,
        'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano']]
y = df['Sales']

In [23]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=0   
)

X_train.shape, X_test.shape

((3063, 7), (1509, 7))

In [24]:
y_train.shape, y_test.shape

((3063,), (1509,))

In [25]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
scaler.mean_[0]

53.99706170421156

In [None]:
from sklearn . preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree =2)
X_train_poly = poly_features.fit_transform(X_train_processed)
X_test_poly = poly_features.transform(X_test_processed) 