# Linear Regression


In [3]:
import numpy as np
class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones((self.num_samples, 1)), X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

        # Initial weights
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []

    def compute_loss(self, y_pred, y_target):
        loss = np.mean((y_pred - y_target) ** 2) / 2
        return loss

    def predict(self, X_data):
        X_data = np.c_[np.ones((X_data.shape[0], 1)), X_data]
        y_pred = np.dot(X_data, self.theta)
        return y_pred

    def fit(self):
        for epoch in range(self.num_epochs):
            y_pred = self.predict(self.X_data)
            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)

            gradients = np.dot(self.X_data.T, (y_pred - self.y_target)) / self.num_samples
            self.theta -= self.learning_rate * gradients

            if (epoch % 50) == 0:
                print(f'Epoch: {epoch} - Loss: {loss}')

        return {
            'loss': sum(self.losses)/len(self.losses),
            'weight': self.theta
        }


In [10]:
def r2score ( y_pred , y ):
  rss = np . sum (( y_pred - y ) ** 2)
  tss = np . sum (( y - y. mean () ) ** 2)
  r2 = 1 - ( rss / tss )
  return r2

In [11]:
y_pred = np . array ([1 , 2 , 3 , 4 , 5])
y = np . array ([1 , 2 , 3 , 4 , 5])
r2score(y_pred , y )

1.0

In [12]:
y_pred = np . array ([1 , 2 , 3 , 4 , 5])
y = np . array ([3 , 5 , 5 , 2 , 4])
r2score ( y_pred , y )

-2.235294117647059

# Polynomial Regression

In [22]:
def create_polynomial_features (X , degree =2) :
  X_new = X
  for d in range (2 , degree +1) :
    X_new = np . c_ [ X_new , np . power (X , d ) ]
  return X_new
X = np . array ([[1] , [2] , [3]])
create_polynomial_features (X , degree =2)

array([[1, 1],
       [2, 4],
       [3, 9]])

In [27]:
def create_polynomial_features(X, degree=2):
    """Creates the polynomial features
    Args:
        X: A array for the data.
        degree: A integer for the degree of
        the generated polynomial function.
    """
    X_mem = []
    for X_sub in X.T:
        X_new = X_sub
        for d in range(2, degree+1):
            X_new = np.c_[X_new, np.power(X_sub, d)]
        X_mem.extend(X_new.T)
    return np.c_[X_mem].T


X = np . array ([[1 , 2] ,
[2 , 3] ,
[3 , 4]])
create_polynomial_features (X , degree =2)

array([[ 1,  1,  2,  4],
       [ 2,  4,  3,  9],
       [ 3,  9,  4, 16]])

# Sales Prediction

In [28]:
!gdown 1A8kK0IEsT3w8htzU18ihFr5UV-euhquC

Downloading...
From: https://drive.google.com/uc?id=1A8kK0IEsT3w8htzU18ihFr5UV-euhquC
To: /content/SalesPrediction.csv
  0% 0.00/206k [00:00<?, ?B/s]100% 206k/206k [00:00<00:00, 37.7MB/s]


In [43]:
import pandas as pd
df = pd.read_csv('/content/SalesPrediction.csv')
df = pd.get_dummies(df, columns=["Influencer"], drop_first=False)  # `drop_first=False` giữ tất cả các cột nhị phân
print(df.head())


     TV      Radio  Social Media       Sales  Influencer_Macro  \
0  16.0   6.566231      2.907983   54.732757             False   
1  13.0   9.237765      2.409567   46.677897             False   
2  41.0  15.886446      2.913410  150.177829             False   
3  83.0  30.020028      6.922304  298.246340             False   
4  15.0   8.437408      1.405998   56.594181             False   

   Influencer_Mega  Influencer_Micro  Influencer_Nano  
0             True             False            False  
1             True             False            False  
2             True             False            False  
3             True             False            False  
4            False              True            False  


In [42]:
import pandas as pd

# Tải dữ liệu từ file CSV
df = pd.read_csv('/content/SalesPrediction.csv')  # Đảm bảo đường dẫn chính xác

# In danh sách các cột trong DataFrame
print("Các cột trong dữ liệu:", df.columns)


Các cột trong dữ liệu: Index(['TV', 'Radio', 'Social Media', 'Influencer', 'Sales'], dtype='object')


In [44]:
# Handle Null values
df = df.fillna(df.mean())

# Get features
X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro', 'Influencer_Mega',
        'Influencer_Micro', 'Influencer_Nano']]
y = df[['Sales']]

# Train Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.33,
    random_state=0
)


In [47]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures

# Chuẩn hóa dữ liệu huấn luyện và kiểm tra
scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
X_test_processed = scaler.transform(X_test)

# Tạo đặc trưng đa thức
poly_features = PolynomialFeatures(degree=2)
X_train_poly = poly_features.fit_transform(X_train_processed)
X_test_poly = poly_features.transform(X_test_processed)

# Kiểm tra kết quả
print("X_train_poly:", X_train_poly[:5])  # Hiển thị 5 dòng đầu tiên của tập huấn luyện đa thức
print("X_test_poly:", X_test_poly[:5])    # Hiển thị 5 dòng đầu tiên của tập kiểm tra đa thức


X_train_poly: [[ 1.          0.33869282 -0.3935435  -0.17228084 -0.57395739 -0.57948652
   1.73167391 -0.57848122  0.11471283 -0.13329036 -0.05835028 -0.19439525
  -0.19626792  0.58650552 -0.19592744  0.15487649  0.06780001  0.2258772
   0.22805315 -0.68148902  0.22765753  0.02968069  0.09888186  0.09983443
  -0.29833424  0.09966123  0.32942708  0.33260057 -0.99390703  0.33202357
   0.33580462 -1.00348168  0.33522207  2.99869452 -1.00174084  0.33464052]
 [ 1.         -0.19852366 -0.28899849 -1.47661593 -0.57395739  1.72566569
  -0.57747593 -0.57848122  0.03941164  0.05737304  0.2931432   0.11394412
  -0.34258547  0.11464263  0.11484221  0.08352013  0.42673977  0.16587282
  -0.49871477  0.16688967  0.1671802   2.18039462  0.84751462 -2.54814546
   0.85271016  0.85419459  0.32942708 -0.99045857  0.33144658  0.33202357
   2.97792208 -0.9965304  -0.9982652   0.33347845  0.33405898  0.33464052]
 [ 1.         -0.42875929 -1.07436552 -0.55865556 -0.57395739 -0.57948652
  -0.57747593  1.728664

In [48]:
from sklearn . linear_model import LinearRegression
from sklearn . metrics import r2_score
poly_model = LinearRegression ()
poly_model . fit ( X_train_poly , y_train )
preds = poly_model . predict ( X_test_poly )
r2_score ( y_test , preds )


0.9951796158547627