In [None]:
# 1.linear regression
import numpy as np

class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones((self.num_samples, 1)), X_data]  # Add bias term
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

        # Initialize weights randomly
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []

    def compute_loss(self, y_pred, y_target):
        # Compute Mean Squared Error (MSE)
        error = y_pred - y_target
        loss = (1 / (2 * self.num_samples)) * np.sum(np.square(error))  # MSE
        return loss

    def predict(self, X_data):
        # Add bias term to the input data (if not already present)
        X_data_with_bias = np.c_[np.ones((X_data.shape[0], 1)), X_data]
        # Calculate predictions using the current weights (theta)
        y_pred = X_data_with_bias.dot(self.theta)
        return y_pred

    def fit(self):
        for epoch in range(self.num_epochs):
            # Make predictions for the current epoch
            y_pred = self.predict(self.X_data)

            # Compute the loss
            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)

            # Gradient descent step
            gradient = (1 / self.num_samples) * self.X_data.T.dot(y_pred - self.y_target)
            self.theta -= self.learning_rate * gradient

            # Print loss every 50 epochs
            if (epoch % 50) == 0:
                print(f'Epoch: {epoch} - Loss: {loss}')

        return {
            'loss': sum(self.losses) / len(self.losses),
            'weight': self.theta
        }

In [None]:
import numpy as np

def r2score(y_pred, y):
    rss = np.sum((y_pred - y) ** 2)  # Residual sum of squares (RSS)
    tss = np.sum((y - y.mean()) ** 2)  # Total sum of squares (TSS) based on the mean of actual values
    r2 = 1 - (rss / tss)
    return r2

# Case 1
y_pred_case1 = np.array([1, 2, 3, 4, 5])
y_case1 = np.array([1, 2, 3, 4, 5])
r2_case1 = r2score(y_pred_case1, y_case1)

# Case 2
y_pred_case2 = np.array([1, 2, 3, 4, 5])
y_case2 = np.array([3, 5, 5, 2, 4])
r2_case2 = r2score(y_pred_case2, y_case2)

r2_case1, r2_case2

(1.0, -2.235294117647059)

In [None]:
# 2.polynomial regression
import numpy as np

def create_polynomial_features(X, degree=2):
    X_new = np.hstack([X ** i for i in range(1, degree + 1)])  # Stack columns of X raised to powers from 1 to degree
    return X_new

X = np.array([[1], [2], [3]])
X_poly = create_polynomial_features(X, degree=2)
print(X_poly)

[[1 1]
 [2 4]
 [3 9]]


In [6]:
import numpy as np

def create_polynomial_features(X, degree=2):
    X_mem = []
    for X_sub in X.T:
        X_new = X_sub
        for d in range(2, degree + 1):
            X_new = np.c_[X_new, np.power(X_sub, d)]
        X_mem.append(X_new.T)
    return np.c_[X_mem].T

X = np.array([[1, 2], [2, 3], [3, 4]])
X_poly = create_polynomial_features(X, degree=2)
print(X_poly)

[[[ 1  2]
  [ 1  4]]

 [[ 2  3]
  [ 4  9]]

 [[ 3  4]
  [ 9 16]]]


In [8]:
# sales prediction
import pandas as pd

df = pd.read_csv('SalesPrediction.csv')
df.head()

Unnamed: 0,TV,Radio,Social Media,Influencer,Sales
0,16.0,6.566231,2.907983,Mega,54.732757
1,13.0,9.237765,2.409567,Mega,46.677897
2,41.0,15.886446,2.91341,Mega,150.177829
3,83.0,30.020028,6.922304,Mega,298.24634
4,15.0,8.437408,1.405998,Micro,56.594181


In [10]:
df = pd.get_dummies(df)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4572 entries, 0 to 4571
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   TV                4562 non-null   float64
 1   Radio             4568 non-null   float64
 2   Social Media      4566 non-null   float64
 3   Sales             4566 non-null   float64
 4   Influencer_Macro  4572 non-null   bool   
 5   Influencer_Mega   4572 non-null   bool   
 6   Influencer_Micro  4572 non-null   bool   
 7   Influencer_Nano   4572 non-null   bool   
dtypes: bool(4), float64(4)
memory usage: 160.9 KB


In [11]:
# Handle Null values
df = df.fillna(df.mean())

# Get features
X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro', 'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano']]
y = df[['Sales']]

# Train Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.33,
    random_state=0
)

In [12]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
scaler.mean_[0]

54.173577723283785

In [14]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler  # Make sure StandardScaler is imported

poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train_processed)

# Apply StandardScaler to X_test before PolynomialFeatures
scaler = StandardScaler()  # Instantiate StandardScaler
X_test_processed = scaler.fit_transform(X_test) #Scale test data

X_test_poly = poly_features.transform(X_test_processed)

from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train_poly, y_train)

# And then make predictions on X_test_poly:
y_pred = model.predict(X_test_poly)

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
preds = poly_model.predict(X_test_poly)
r2_score(y_test, preds)

-2.2119776223197106e+21