In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

def preprocess_data(df):
    # One-hot encoding for Influencer column
    df = pd.get_dummies(df)
    df = df.fillna(df.mean())

    # Get features and target
    X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro',
            'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano']]
    y = df[['Sales']]

    return X, y

def scale_features(X_train, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

def create_polynomial_features(X_train_scaled, X_test_scaled, degree=2):
    poly_features = PolynomialFeatures(degree=degree)
    X_train_poly = poly_features.fit_transform(X_train_scaled)
    X_test_poly = poly_features.transform(X_test_scaled)
    return X_train_poly, X_test_poly

def train_and_evaluate(X_train_poly, X_test_poly, y_train, y_test):
    # Initialize and train the model
    poly_model = LinearRegression()
    poly_model.fit(X_train_poly, y_train)

    # Make predictions
    predictions = poly_model.predict(X_test_poly)

    # Calculate R2 score
    r2 = r2_score(y_test, predictions)

    return poly_model, predictions, r2

def main():
    df = load_data('SalesPrediction.csv')

    X, y = preprocess_data(df)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=0
    )

    X_train_scaled, X_test_scaled = scale_features(X_train, X_test)

    X_train_poly, X_test_poly = create_polynomial_features(X_train_scaled, X_test_scaled)

    print("Training and evaluating model")
    model, predictions, r2_score_val = train_and_evaluate(
        X_train_poly, X_test_poly, y_train, y_test
    )

    print(f"\nModel Performance:")
    print(f"R² Score: {r2_score_val:.4f}")

if __name__ == "__main__":
    main()

Training and evaluating model

Model Performance:
R² Score: 0.9952


In [1]:
class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones((self.num_samples, 1)), X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []

    def compute_loss(self, y_pred, y_target):
        loss = (y_pred - y_target) ** 2
        loss = np.mean(loss)  # Tính lỗi bình phương trung bình
        return loss

    def predict(self, X_data):
        if X_data.shape[1] == self.num_features:
            X_data = np.c_[np.ones((X_data.shape[0], 1)), X_data]  # Thêm bias nếu thiếu
        y_pred = X_data.dot(self.theta)  # Tính dự đoán
        return y_pred

    def fit(self):
        for epoch in range(self.num_epochs):
            y_pred = self.predict(self.X_data)  # Dự đoán đầu ra
            loss = self.compute_loss(y_pred, self.y_target)  # Tính lỗi
            self.losses.append(loss)

            # Gradient descent
            gradients = (1 / self.num_samples) * self.X_data.T.dot(y_pred - self.y_target)
            self.theta -= self.learning_rate * gradients  # Cập nhật trọng số

            if epoch % 50 == 0:
                print(f'Epoch: {epoch} - Loss: {loss}')

        return {
            'loss': sum(self.losses) / len(self.losses),
            'weight': self.theta
        }
