# Coklu Dogrusal Regression

In [28]:
import warnings
warnings.simplefilter(action='ignore')
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [1]:
import pandas as pd
path = 'Advertising.csv'
ad = pd.read_csv(path, usecols=[1,2,3,4])
df = ad.copy()
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [2]:
X = df.drop('sales', axis=1)
X[:5]

Unnamed: 0,TV,radio,newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4


# Linear Regression 

In [6]:
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict

In [5]:
y = df['sales']
y.head()

0    22.1
1    10.4
2     9.3
3    18.5
4    12.9
Name: sales, dtype: float64

In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20 , random_state=144)

In [49]:
X_train.shape

(160, 3)

In [50]:
X_test.shape

(40, 3)

In [51]:
y_train.shape

(160,)

In [52]:
y_test.shape

(40,)

In [53]:
training = df.copy()

In [54]:
training.shape

(200, 4)

# Statsmodel

In [55]:
import statsmodels.api as sm

lm = sm.OLS(y_train, X_train)
model = lm.fit()
model.summary()

0,1,2,3
Dep. Variable:,sales,R-squared (uncentered):,0.982
Model:,OLS,Adj. R-squared (uncentered):,0.981
Method:,Least Squares,F-statistic:,2787.0
Date:,"Mon, 29 Nov 2021",Prob (F-statistic):,7.04e-136
Time:,21:47:03,Log-Likelihood:,-339.0
No. Observations:,160,AIC:,684.0
Df Residuals:,157,BIC:,693.2
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
TV,0.0544,0.002,36.212,0.000,0.051,0.057
radio,0.2127,0.011,18.979,0.000,0.191,0.235
newspaper,0.0230,0.008,2.887,0.004,0.007,0.039

0,1,2,3
Omnibus:,6.947,Durbin-Watson:,1.857
Prob(Omnibus):,0.031,Jarque-Bera (JB):,7.358
Skew:,-0.359,Prob(JB):,0.0253
Kurtosis:,3.766,Cond. No.,13.4


In [56]:
model.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
TV,0.0544,0.002,36.212,0.000,0.051,0.057
radio,0.2127,0.011,18.979,0.000,0.191,0.235
newspaper,0.0230,0.008,2.887,0.004,0.007,0.039


# Sklearn 

In [57]:
from sklearn.linear_model import LinearRegression

In [58]:
lr = LinearRegression()
modelLr = lr.fit(X_train, y_train)

In [59]:
modelLr.intercept_

2.947047734069674

In [60]:
modelLr.coef_

array([0.04654781, 0.18148351, 0.00077954])

# Tahmin

In [61]:
# 30 Tv , 10 Radio , 40 Newspapaer

yeni_veri = [[30], [10], [40]]
yeni_veri = pd.DataFrame(yeni_veri).T
yeni_veri.head()

Unnamed: 0,0,1,2
0,30,10,40


In [62]:
modelLr.predict(yeni_veri)

array([6.18949881])

In [63]:
import numpy as np

rmse = np.sqrt(mean_squared_error(y_train, modelLr.predict(X_train)))
rmse

1.6748559274650712

In [64]:
rmse = np.sqrt(mean_squared_error(y_test, modelLr.predict(X_test)))
rmse

1.6640263686701036

# Model Tuning  / Model Dogrulama

In [148]:
X = df.drop('sales', axis=1)
y = df['sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20 ,
                                                    random_state=144)


In [149]:
rmse = np.sqrt(mean_squared_error(y_train, modelLr.predict(X_train)))
rmse

1.6748559274650712

In [150]:
rmse = np.sqrt(mean_squared_error(y_test, modelLr.predict(X_test)))
rmse

1.6640263686701036

In [151]:
modelLr.score(X_train, y_train)

0.8971614078663419

In [152]:
cross_val_score(modelLr, X_train, y_train, cv=10, scoring='r2').mean()

0.8733783298422942

In [157]:
-cross_val_score(modelLr, X_train, y_train, cv=10, scoring='neg_mean_squared_error').mean()

3.0491814361587064

In [155]:
rmseCV = np.sqrt(-cross_val_score(modelLr,
                                  X_train,
                                  y_train,
                                  cv=10,
                                  scoring='neg_mean_squared_error').mean())
rmseCV

1.746190549785076

In [156]:
rmseCV = np.sqrt(-cross_val_score(modelLr,
                                  X_test,
                                  y_test,
                                  cv=10,
                                  scoring='neg_mean_squared_error').mean())
rmseCV

1.8213882488500592