In [10]:
# VERİ SETİNİ GETİRME
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

data = sns.load_dataset("tips")
df = pd.DataFrame(data)

# Eksik Verileri Ortalama ile Doldurma
imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
imputer = imputer.fit(df[["total_bill"]])
df[["total_bill"]] = imputer.transform(df[["total_bill"]])

# Kategorik Olmayan Değişkenler
total_bill = df[["total_bill"]]
tip = df[["tip"]]
size = df[["size"]]

# Ktegorik Değişkenler İçin 0-1 Dönüşümü
smoker = pd.get_dummies(df[["smoker"]])
time = pd.get_dummies(df[["day"]])

# Kukla Değişkeni Silme
smoker = smoker.drop("smoker_Yes", axis=1)

# Verileri Birleştirme
new_data = pd.concat([total_bill, size, smoker, time, tip], axis=1)

# Train ve Test Belirleme
training = new_data.iloc[:, 0:7]
testing = new_data.iloc[:, 7:8]

# Train Test Ayırma
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(training, testing, test_size=0.30, random_state=42)

In [11]:
from sklearn.linear_model import LinearRegression
multilinear_reg = LinearRegression()
model = multilinear_reg.fit(x_train, y_train)

In [12]:
model.intercept_

array([0.27803791])

In [13]:
model.coef_

array([[ 0.0962309 ,  0.27509438,  0.29590809,  0.00362031,  0.15143982,
        -0.05206352, -0.10299662]])

In [14]:
y_pred = multilinear_reg.predict(x_test)
print(y_pred)

[[2.97936772]
 [1.86508315]
 [3.93379554]
 [3.81776996]
 [2.0828487 ]
 [2.44680088]
 [3.56865383]
 [2.32871673]
 [2.44343087]
 [2.45969776]
 [2.78120134]
 [2.10847806]
 [2.15261418]
 [2.11865351]
 [1.82170588]
 [3.19659346]
 [2.90742153]
 [3.097801  ]
 [2.65506958]
 [5.91678906]
 [3.61327736]
 [3.33433375]
 [2.15322736]
 [1.99107636]
 [3.01155028]
 [2.25269432]
 [2.00942951]
 [3.20888035]
 [3.15817447]
 [6.7569582 ]
 [4.95974296]
 [1.52099033]
 [3.33613236]
 [2.72868815]
 [2.92927093]
 [3.99518395]
 [2.13732424]
 [5.53054992]
 [2.39415374]
 [2.95771769]
 [2.08345802]
 [2.52502777]
 [3.32586824]
 [2.29029774]
 [1.82687451]
 [0.79649764]
 [1.7952484 ]
 [3.14572118]
 [1.85526069]
 [2.35001762]
 [3.11168714]
 [3.60484549]
 [4.6228088 ]
 [2.66552489]
 [2.89667842]
 [2.38164373]
 [1.42290409]
 [2.78690591]
 [2.95717789]
 [2.56112038]
 [4.7189137 ]
 [2.68689505]
 [2.97029699]
 [2.54534326]
 [3.01882239]
 [3.08522171]
 [2.22058899]
 [1.4946509 ]
 [3.61003721]
 [3.65581424]
 [3.47381183]
 [4.31

In [15]:
y_tahmin=pd.DataFrame(data = y_pred)
y_tahmin.index=y_test.index

result=pd.concat([y_tahmin, y_test],axis=1)
result.columns=[["Tahmin","Gerçek"]]

print(result)

       Tahmin Gerçek
24   2.979368   3.18
6    1.865083   2.00
153  3.933796   2.00
211  3.817770   5.16
198  2.082849   2.00
..        ...    ...
165  3.655814   3.48
154  3.473812   2.00
216  4.310346   3.00
79   2.791587   2.71
29   2.963008   3.00

[74 rows x 2 columns]


### Stats Model

In [18]:
import statsmodels.api as sm

lm = sm.OLS(y_train, x_train)
model = lm.fit()
model.summary()

0,1,2,3
Dep. Variable:,tip,R-squared:,0.492
Model:,OLS,Adj. R-squared:,0.473
Method:,Least Squares,F-statistic:,26.26
Date:,"Sat, 17 Oct 2020",Prob (F-statistic):,9.78e-22
Time:,09:54:21,Log-Likelihood:,-248.22
No. Observations:,170,AIC:,510.4
Df Residuals:,163,BIC:,532.4
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
total_bill,0.0962,0.011,8.469,0.000,0.074,0.119
size,0.2751,0.110,2.505,0.013,0.058,0.492
smoker_No,0.2959,0.181,1.637,0.104,-0.061,0.653
day_Thur,0.2817,0.302,0.934,0.352,-0.314,0.877
day_Fri,0.4295,0.343,1.251,0.213,-0.248,1.107
day_Sat,0.2260,0.298,0.759,0.449,-0.362,0.814
day_Sun,0.1750,0.326,0.536,0.593,-0.470,0.820

0,1,2,3
Omnibus:,14.089,Durbin-Watson:,2.037
Prob(Omnibus):,0.001,Jarque-Bera (JB):,23.778
Skew:,0.423,Prob(JB):,6.86e-06
Kurtosis:,4.625,Cond. No.,150.0


### MSE

İstatistikte, bir tahmin edicinin ortalama karesi alınmış hatası veya ortalama kare sapması, hataların karelerinin ortalamasını, yani, tahmin edilen değerler ile gerçek değer arasındaki ortalama kare farkını ölçer. MSE, kare hata kaybının beklenen değerine karşılık gelen bir risk fonksiyonudur.

In [None]:
from sklearn.metrics import mean_squared_error 

rmse = np.sqrt(mean_squared_error(y_train, model.predict(x_train)))
print("Train RMSE: ", rmse)
rmse = np.sqrt(mean_squared_error(y_test, model.predict(x_test)))
print("Test RMSE: ", rmse)

### Predict

In [222]:
total_bill = int(input("Total Bill: "))
size = int(input("Size: "))
smoker = int(input("Smoker? (0 & 1): "))
thursday = int(input("is it Thursday? (0 & 1): "))
friday = int(input("is it Friday? (0 & 1): "))
saturday = int(input("is it Saturday? (0 & 1): "))
sunday = int(input("is it Sunday? (0 & 1): "))

tahmin = [total_bill, size, smoker, thursday, friday, saturday, sunday]

sabit = multilinear_reg.intercept_[0]
total_bill_weight = multilinear_reg.coef_[:, 0] * tahmin[0]
size_weight = multilinear_reg.coef_[:, 0:1] * tahmin[1]
smoker_weight = multilinear_reg.coef_[:, 1:2] * tahmin[2]
Thur_weight = multilinear_reg.coef_[:, 2:3] * tahmin[3]
Fri_weight = multilinear_reg.coef_[:, 3:4] * tahmin[4]
Sat_weight = multilinear_reg.coef_[:, 4:5] * tahmin[5]
Sun_weight = multilinear_reg.coef_[:, 5:6] * tahmin[6]

result = sabit + total_bill_weight + size_weight + smoker_weight + Thur_weight + Fri_weight + Sat_weight + Sun_weight
print("Tahmin: ", result[:,0][0])


Total Bill: 20
Size: 5
Smoker? (0 & 1): 1
is it Thursday? (0 & 1): 1
is it Friday? (0 & 1): 0
is it Saturday? (0 & 1): 0
is it Sunday? (0 & 1): 0
Tahmin:  3.254812927912407
