In [3]:
import numpy as np

In [4]:
class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones((self.num_samples, 1)), X_data]
        self.y_target = y_target
        self.leaning_rate = learning_rate
        self.num_epochs = num_epochs
        
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []
        
    def compute_loss(self, y_pred, y_target):
        return np.mean((y_pred - y_target) ** 2)
    
    def predict(self, X_data):
        return X_data.dot(self.theta)
    
    def fit(self):
        for epoch in range(self.num_epochs):
            y_pred = self.predict(self.X_data)
            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)
            loss_grd = 2 * (y_pred - self.y_target) / self.num_samples
            gradients = self.X_data.T.dot(loss_grd)
            self.theta -= self.leaning_rate * gradients
            
            if (epoch % 50) == 0:
                print(f'Epoch {epoch}, loss {loss}')
            
        return {
            'loss': sum(self.losses) / len(self.losses),
            'weight': self.theta
        }

In [5]:
def r2score(y_pred, y):
    rss = np.sum((y_pred - y) ** 2)
    tss = np.sum((y - np.mean(y)) ** 2)
    return 1 - rss / tss

y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 3, 4, 5])

print(r2score(y_pred, y))

y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([3, 5, 5, 2, 4])
print(r2score(y_pred, y))

1.0
-2.235294117647059


In [6]:
def create_polynomial_features(X, degree=2):
    X_poly = X.copy()
    for i in range(2, degree + 1):
        X_poly = np.c_[X_poly, X ** i]
    return X_poly

X = np.array([[1], [2], [3]])
X_poly = create_polynomial_features(X, degree=2)
print(X_poly)

[[1 1]
 [2 4]
 [3 9]]


In [7]:
import pandas as pd

In [8]:
df = pd.read_csv("SalesPrediction.csv")
df.head()

Unnamed: 0,TV,Radio,Social Media,Influencer,Sales
0,16.0,6.566231,2.907983,Mega,54.732757
1,13.0,9.237765,2.409567,Mega,46.677897
2,41.0,15.886446,2.91341,Mega,150.177829
3,83.0,30.020028,6.922304,Mega,298.24634
4,15.0,8.437408,1.405998,Micro,56.594181


In [10]:
df = pd.get_dummies(df)
df.head()

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.91341,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.24634,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False


In [11]:
df = df.fillna(df.mean())

X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro', 'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano']]
y = df[['Sales']]

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

In [13]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
scaler.mean_[0]

54.173577723283785

In [14]:
from sklearn.preprocessing import PolynomialFeatures

poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train_processed)
X_test_poly = poly_features.transform(scaler.transform(X_test))

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

preds = poly_model.predict(X_test_poly)
r2_score(y_test, preds)

0.9951466975708932