### Import the necessary libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


### Load the dataset into a Pandas DataFrame

In [3]:
data = pd.read_csv('Advertising.csv')

In [4]:
# Display the Dataset

display(data)

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9
...,...,...,...,...,...
195,196,38.2,3.7,13.8,7.6
196,197,94.2,4.9,8.1,9.7
197,198,177.0,9.3,6.4,12.8
198,199,283.6,42.0,66.2,25.5




### Explore the data

In [26]:
# Display the first few rows of the dataset

display(data.head())

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [29]:
# Get an overview of the dataset

display(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  200 non-null    int64  
 1   TV          200 non-null    float64
 2   Radio       200 non-null    float64
 3   Newspaper   200 non-null    float64
 4   Sales       200 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 7.9 KB


None

In [30]:
# Checck for missing values

display(data.isnull().sum())

Unnamed: 0    0
TV            0
Radio         0
Newspaper     0
Sales         0
dtype: int64

### Preprocess the data
####     > Prepare the data.
####     > Handling missing values and converting category of variables into numbers, if necessary.
####     > Spliting the dataset Features (Input) and target (Output).


### Split the data into training and testing sets

In [31]:
# Features ( Inputs )
g = data.drop('Sales', axis = 1)

In [32]:
# Display features ( Inputs)
display(g)

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper
0,1,230.1,37.8,69.2
1,2,44.5,39.3,45.1
2,3,17.2,45.9,69.3
3,4,151.5,41.3,58.5
4,5,180.8,10.8,58.4
...,...,...,...,...
195,196,38.2,3.7,13.8
196,197,94.2,4.9,8.1
197,198,177.0,9.3,6.4
198,199,283.6,42.0,66.2


In [33]:
# Target ( Outputs)

s = data['Sales']

In [34]:
# Display the Target (Outputs) 
display(s)

0      22.1
1      10.4
2       9.3
3      18.5
4      12.9
       ... 
195     7.6
196     9.7
197    12.8
198    25.5
199    13.4
Name: Sales, Length: 200, dtype: float64

In [35]:
g_train, g_test, s_train, s_test = train_test_split(g, s, test_size = 0.2, random_state = 42)

In [36]:
display(g_train, g_test, s_train, s_test )

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper
79,80,116.0,7.7,23.1
197,198,177.0,9.3,6.4
38,39,43.1,26.7,35.1
24,25,62.3,12.6,18.3
122,123,224.0,2.4,15.6
...,...,...,...,...
106,107,25.0,11.0,29.7
14,15,204.1,32.9,46.0
92,93,217.7,33.5,59.0
179,180,165.6,10.0,17.6


Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper
95,96,163.3,31.6,52.9
15,16,195.4,47.7,52.9
30,31,292.9,28.3,43.2
158,159,11.7,36.9,45.2
128,129,220.3,49.0,3.2
115,116,75.1,35.0,52.7
69,70,216.8,43.9,27.2
170,171,50.0,11.6,18.4
174,175,222.4,3.4,13.1
45,46,175.1,22.5,31.5


79     11.0
197    12.8
38     10.1
24      9.7
122    11.6
       ... 
106     7.2
14     19.0
92     19.4
179    12.6
102    14.8
Name: Sales, Length: 160, dtype: float64

95     16.9
15     22.4
30     21.4
158     7.3
128    24.7
115    12.6
69     22.3
170     8.4
174    11.5
45     14.9
66      9.5
182     8.7
165    11.9
78      5.3
186    10.3
177    11.7
56      5.5
152    16.6
82     11.3
68     18.9
124    19.7
16     12.5
148    10.9
93     22.2
65      9.3
60      8.1
84     21.7
67     13.4
125    10.6
132     5.7
9      10.6
18     11.3
55     23.7
75      8.7
150    16.1
104    20.7
135    11.6
137    20.8
164    11.9
76      6.9
Name: Sales, dtype: float64



### Train the model

In [37]:
model = LinearRegression()
model.fit(g_train, s_train)



### Make predictions

In [38]:
s_pred = model.predict(g_test)

In [39]:
# Making predictions are displaying

display(s_pred)

array([16.41227699, 20.84319293, 21.51186915, 10.65309994, 22.12405753,
       13.12383506, 21.03748883,  7.50385439, 13.64787029, 15.12040886,
        9.02055551,  6.70666642, 14.40155146,  8.88609041,  9.74353333,
       12.21551975,  8.71370868, 16.2941451 , 10.26802337, 18.80498245,
       19.58864662, 13.22318303, 12.3647867 , 21.31473607,  7.79734749,
        5.78189965, 20.74934811, 11.95476656,  9.199109  ,  8.52178704,
       12.40431294,  9.94865202, 21.36880272, 12.25374832, 18.29943527,
       20.13347356, 14.07464638, 20.88594679, 11.05214979,  4.55168744])



### Evaluate the model

In [40]:
mse = mean_squared_error(s_test, s_pred)
rmse = np.sqrt(mse)

In [41]:
# Display the evaluated model

display("Root Mean Squared Error:", rmse)

'Root Mean Squared Error:'

1.7885761008659673