In [1]:
# Import required libraries for data handling, preprocessing, and modeling
import pandas as pd  # For data manipulation and loading CSV files
from sklearn.model_selection import train_test_split  # For splitting data into training and test sets
from sklearn.preprocessing import StandardScaler  # For scaling features to standard normal distribution
from sklearn.linear_model import LinearRegression, Lasso  # For linear regression and Lasso models
import joblib  # For saving and loading models and scalers
import numpy as np  # For numerical operations and array handling

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Load the advertising dataset from a CSV file
# The dataset contains columns: TV, Radio, Newspaper (features), and Sales (target)
df = pd.read_csv("F:\\Dataset\\06. Advertising.csv")

In [4]:
df

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,14.0
197,177.0,9.3,6.4,14.8
198,283.6,42.0,66.2,25.5


In [5]:
# Check the shape of the dataset to understand its dimensions
# Output: (200, 4) indicates 200 rows and 4 columns (3 features + 1 target)
df.shape

# Check for missing values in each column
# Output: All zeros indicate no missing data
df.isnull().sum()

# Verify data types of each column
# Output: All columns are float64, suitable for numerical computations
df.dtypes

TV           float64
Radio        float64
Newspaper    float64
Sales        float64
dtype: object

In [6]:
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [7]:
X = df.drop("Sales", axis = 1)

In [8]:
X

Unnamed: 0,TV,Radio,Newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4
...,...,...,...
195,38.2,3.7,13.8
196,94.2,4.9,8.1
197,177.0,9.3,6.4
198,283.6,42.0,66.2


In [9]:
y = df.Sales

In [10]:
X.shape

(200, 3)

In [11]:
y.shape

(200,)

In [12]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [13]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((160, 3), (40, 3), (160,), (40,))

In [14]:
scaler= StandardScaler()

In [15]:
x_train_scale = scaler.fit_transform(x_train)

In [16]:
x_test_scale = scaler.transform(x_test)

In [17]:
x_train_scale

array([[-4.04248386e-01, -1.02823707e+00, -3.37675384e-01],
       [ 3.20607716e-01, -9.19827737e-01, -1.16143931e+00],
       [-1.27051084e+00,  2.59123702e-01,  2.54250789e-01],
       [-1.04235941e+00, -6.96233499e-01, -5.74445854e-01],
       [ 8.79103401e-01, -1.38734296e+00, -7.07629243e-01],
       [-1.32873699e+00, -1.29926038e+00, -7.96418169e-01],
       [-9.43731452e-01, -4.65863678e-01,  5.35415722e-01],
       [-3.23140256e-02,  6.94073782e-02, -5.34984109e-01],
       [-5.39713297e-01, -1.16374872e+00,  2.19721762e-01],
       [-8.75998996e-01,  3.13328366e-01, -6.87898371e-01],
       [-8.53421511e-01,  1.62101588e+00,  2.24654481e-01],
       [ 2.18414888e-01, -1.06889056e+00, -8.45745350e-01],
       [-1.67928215e+00,  1.76330312e+00,  2.22240532e+00],
       [-1.68997675e+00,  1.08574483e+00,  1.01882210e+00],
       [-8.74810708e-01, -1.49575229e+00, -7.47090988e-01],
       [-2.45017701e-01, -1.16374872e+00,  6.68075010e-02],
       [-9.10459368e-01, -3.98107848e-01

In [18]:
x_test_scale

array([[ 0.15781217,  0.59112727,  1.13227461],
       [ 0.53925283,  1.68199613,  1.13227461],
       [ 1.69783431,  0.36753303,  0.65380096],
       [-1.64363349,  0.95023317,  0.75245532],
       [ 0.83513672,  1.77007871, -1.31928629],
       [-0.89025846,  0.82149709,  1.12240918],
       [ 0.79354661,  1.42452397, -0.13543394],
       [-1.18851892, -0.76398933, -0.56951314],
       [ 0.86009078, -1.31958713, -0.8309472 ],
       [ 0.29803023, -0.02545078,  0.07667294],
       [-1.40835233,  0.11683646, -1.36861347],
       [-1.11484502, -1.16374872, -0.01211599],
       [ 1.00387371, -1.31958713,  2.70581169],
       [-1.71849568,  0.47594236, -1.01345777],
       [-0.12500054, -1.40766971, -0.16503025],
       [ 0.23980408, -1.02146148,  0.25918351],
       [-1.69591819,  0.35398186,  0.56501203],
       [ 0.56539519,  0.02875388, -0.7766873 ],
       [-0.88788188, -0.17451361,  0.12600012],
       [ 1.03833409,  0.31332837, -0.93453428],
       [ 0.94445928,  0.63855635,  2.182

In [19]:
lr = LinearRegression()

In [20]:
lr.fit(x_train_scale,y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [21]:
lr.coef_

array([4.58720774, 1.48984025, 0.08791597])

In [22]:
lr.intercept_

15.330625000000003

In [23]:
lr.score(x_test_scale,y_test)

0.9059011844150826

In [26]:
y_pred = lr.predict(x_test_scale)

In [89]:
pd.DataFrame({"y_true": y_test, 
             "y_pred": y_pred})

Unnamed: 0,y_true,y_pred
95,16.9,17.034772
15,22.4,20.40974
30,21.4,23.723989
158,7.3,9.272785
128,24.7,21.682719
115,12.6,12.569402
69,22.3,21.081195
170,8.4,8.69035
174,16.5,17.237013
45,16.1,16.666575


In [25]:
from sklearn.metrics import mean_squared_error,mean_absolute_error, r2_score

In [28]:
mse = mean_squared_error(y_test,y_pred)

In [57]:
mse

2.9077569102710923

In [30]:
rmse= np.sqrt(mse)

In [32]:
mae = mean_absolute_error(y_test, y_pred)

In [33]:
rmse

1.7052146229349232

In [34]:
mae

1.2748262109549344

In [37]:
r2_score = r2_score(y_test,y_pred)

In [61]:
r2_score

0.9059011844150826

In [38]:
from sklearn.linear_model import Lasso, Ridge, ElasticNet

In [39]:
lasso = Lasso()

In [42]:
lasso.fit(x_train_scale,y_train)

0,1,2
,alpha,1.0
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [43]:
lasso.score(x_test_scale,y_test)

0.8222164135010256

In [48]:
rd = Ridge()

In [49]:
rd.fit(x_test_scale,y_test)

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [51]:
rd.score(x_test_scale,y_test)

0.9169030391511828

In [52]:
el = ElasticNet()

In [53]:
el.fit(x_train_scale,y_train)

0,1,2
,alpha,1.0
,l1_ratio,0.5
,fit_intercept,True
,precompute,False
,max_iter,1000
,copy_X,True
,tol,0.0001
,warm_start,False
,positive,False
,random_state,


In [54]:
el.score(x_test_scale,y_test)

0.7386541859953734

In [55]:
y_pred_rd = rd.predict(x_test_scale)

In [60]:
y_pred_rd

array([16.56285964, 20.27116147, 23.02741217,  9.27777877, 21.99778635,
       12.34241082, 21.05290132,  8.26219601, 16.3458174 , 16.15302989,
        8.9980185 ,  7.79253047, 16.6201955 ,  8.25024876, 11.74645197,
       14.03457835,  7.96277742, 17.52643935, 10.61374912, 20.16591997,
       20.03083661, 11.84769129, 11.25817364, 21.69420827,  9.06385024,
        7.22879409, 20.73284146, 13.37237821, 10.20938471,  8.10706218,
       15.01374049, 10.38931426, 20.63258246, 10.17974403, 20.61204208,
       21.08880608, 12.6574514 , 22.00732001, 12.25726428,  5.81225096])

In [58]:
mse = mean_squared_error(y_test,y_pred_rd)

In [62]:
mse

2.5677874968853844

In [63]:
r2_score_rd = r2_score(y_test, y_pred_rd)

TypeError: 'float' object is not callable

In [64]:
rd.score(x_test_scale, y_test)

0.9169030391511828

In [66]:
joblib.dump(lr, "linear.pkl")

['linear.pkl']

In [67]:
joblib.dump(scaler, "scale.pkl")

['scale.pkl']

In [82]:
# Example: Predict Sales for new data
# Define new data as a list of advertising budgets [TV, Radio, Newspaper]
new_data = np.array([[250, 35, 65],  # Sample data point (similar to first row)
                     [44.5, 39.3, 45.1]])   # Another sample data point

In [83]:
new_data

array([[250. ,  35. ,  65. ],
       [ 44.5,  39.3,  45.1]])

In [84]:
# Load the saved scaler and model
loaded_scaler = joblib.load('scale.pkl')  # Load the saved scaler
loaded_model = joblib.load('linear.pkl')  # Load the saved Lasso model

In [85]:
# Scale the new data using the loaded scaler
new_data_scl = loaded_scaler.transform(new_data)

In [86]:
new_data_scl

array([[ 1.18805846,  0.82149709,  1.72913351],
       [-1.2538748 ,  1.11284716,  0.7475226 ]])

In [87]:
# Predict Sales for the new data using the loaded model
new_predictions = loaded_model.predict(new_data_scl)

In [88]:
new_predictions

array([22.15641384, 11.30252447])

In [None]:
225	35	65	20.79
[230.1,  37.8,  69.2] 22.1