In [6]:
# Step 1 – Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib
import os
# Step 2 – Load dataset from Drive
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/sales_prediction DS')
print("Dataset loaded successfully!")
print(data.head())
# Step 3 – Split features and target
X = data.drop(columns=['Sales'])  # Features
y = data['Sales']                 # Target
# Step 4 – Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
# Step 5 – Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Step 6 – Train Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)
# Step 7 – Evaluate model
y_pred = model.predict(X_test_scaled)
print("✅ Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("✅ R2 Score:", r2_score(y_test, y_pred))
# Step 8 – Save model & scaler to Drive
save_folder = '/content/drive/MyDrive/Colab Notebooks'
os.makedirs(save_folder, exist_ok=True)

joblib.dump(model, os.path.join(save_folder, 'sales_model.pkl'))
joblib.dump(scaler, os.path.join(save_folder, 'sales_scaler.pkl'))
print("Model & scaler saved successfully! Ready for GitHub upload.")


Dataset loaded successfully!
      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9
✅ Mean Squared Error (MSE): 2.9077569102710927
✅ R2 Score: 0.9059011844150825
Model & scaler saved successfully! Ready for GitHub upload.
