### Çoklu Doğrusal Regresyon Modeli

In [122]:
# Kütühaneler:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression # Doğrusal Regresyon
from sklearn.metrics import mean_squared_error, mean_absolute_error # Model Başarı Metrikleri
from sklearn.model_selection import train_test_split, cross_val_score # Model Test ve Eğitim Kütüphanesi

# Pandas ayarları
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [123]:
# Veri setinin okutulması
df = pd.read_csv("advertising.csv")

In [124]:
df.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [125]:
X = df.drop("sales", axis=1)

In [126]:
y = df[["sales"]]

In [127]:
##################################################
# Model
##################################################

In [128]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) # Test ve Eğitim verileri ayırma

In [129]:
X_train.shape

(160, 3)

In [130]:
y_train.shape

(160, 1)

In [131]:
X_test.shape

(40, 3)

In [132]:
y_test.shape

(40, 1)

In [133]:
reg_model = LinearRegression().fit(X_train, y_train)


In [134]:
# sabit (b - bias)
reg_model.intercept_[0]

2.907947020816433

In [135]:
# coefficients (w - weights)
reg_model.coef_[0]

array([0.0468431 , 0.17854434, 0.00258619])

In [136]:
##################################################
# Tahmin (Prediction)
##################################################

# Aşağıdali gözlem değerlerine göre satışın beklenen değeri nedir?

# TV: 30
# radio: 10
# newspaper: 40

TV = 30
radio = 10
newspaper = 40

# sales = 2.9 + 0.046 * TV + 0.178 * radio + 0.002 * newspaper 
reg_model.intercept_[0] + reg_model.coef_[0][0] * TV + reg_model.coef_[0][1] * radio + reg_model.coef_[0][2] * newspaper

6.202130997974466

In [137]:
yeni_veri = [[30], [10], [40]]
yeni_veri = pd.DataFrame(yeni_veri).T

In [138]:
reg_model.predict(yeni_veri)



array([[6.202131]])

In [139]:
##################################################
# Tahmin Başarısını Değerlendirme
##################################################

In [140]:
y_pred = reg_model.predict(X_train)
np.sqrt(mean_squared_error(y_train, y_pred))

1.736902590147092

In [141]:
# TRAIN r-squared değerini hesaplama (r-kare)
reg_model.score(X_train, y_train)

0.8959372632325174

In [142]:
# TEST RMSE
y_pred = reg_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

1.4113417558581587

In [143]:
# TEST r-squared değerini hesaplama (r-kare)
reg_model.score(X_test, y_test)

0.8927605914615384

In [144]:
# 10 Katlı Çapraz Doğrulama RMSE
np.mean(np.sqrt(-cross_val_score(reg_model, X, y,cv=10, scoring="neg_mean_squared_error")))

1.6913531708051797

In [145]:
# 5 Katlı Çapraz Doğrulama RMSE
np.mean(np.sqrt(-cross_val_score(reg_model, X, y,cv=5, scoring="neg_mean_squared_error")))

1.7175247278732084