In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [2]:
data = pd.read_csv("Advertising.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,TV,radio,newspaper,sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [12]:
X_train, X_test, y_train, y_test = train_test_split(data[['TV', 'radio', 'newspaper']], data.sales, test_size=0.3, random_state=42)

In [9]:
len(X_train)

140

In [10]:
len(X_test)

60

### **Building Linear Regression Model with Train Dataset**

In [13]:
linreg = LinearRegression()
linreg.fit(X_train, y_train)

In [14]:
linreg.intercept_

np.float64(2.7089490925159065)

In [15]:
linreg.coef_

array([0.04405928, 0.1992875 , 0.00688245])

In [16]:
list(zip(["TV", "radio", "newspaper"], list(linreg.coef_)))

[('TV', np.float64(0.04405928095746522)),
 ('radio', np.float64(0.19928749689893954)),
 ('newspaper', np.float64(0.006882452222275487))]

In [17]:
y_pred = linreg.predict(X_test)

In [19]:
test_pred_df = pd.DataFrame({"actual": y_test, "predicted": y_pred, "residual": y_test - y_pred})
test_pred_df.sample(10)

Unnamed: 0,actual,predicted,residual
158,7.3,10.889238,-3.589238
55,23.7,21.73009,1.96991
164,11.9,10.839388,1.060612
197,12.8,12.404863,0.395137
150,16.1,18.101136,-2.001136
195,7.6,5.224355,2.375645
137,20.8,20.938265,-0.138265
115,12.6,13.355569,-0.755569
177,11.7,12.004544,-0.304544
117,9.4,6.336368,3.063632


In [21]:
r2 = metrics.r2_score(y_train, linreg.predict(X_train))
print("R-Squared:", r2)

R-Squared: 0.9055159502227753


In [26]:
mse = metrics.mean_squared_error(y_test, y_pred)
rmse = round(np.sqrt(mse), 2)
print("RMSE:", rmse)

RMSE: 1.95
