# Linear Regression - Sales Prediction Example

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, cross_val_score

* Set Option

In [None]:
pd.set_option('display.max_columns', None) # Show All Columns
pd.set_option('display.max_rows', None) # Show All Rows
pd.set_option('display.float_format', lambda x: '%.3f' % x) # After Comma 3 Digit
pd.set_option('display.width', 500) # Show 500 Width

* Load Data

In [None]:
df = pd.read_csv("data/advertising.csv")

* Discover Data

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
df.nunique().sort_values(ascending=False)

In [None]:
df["sales"].value_counts()

In [None]:
df.isnull().sum().sort_values(ascending=False)

In [None]:
df.describe().T

In [None]:
df.quantile([0, 0.05, 0.50, 0.90, 0.99, 1]).T

In [None]:
corr = df.drop("sales",axis=1).corr().abs()
corr_values = corr.unstack()
corr_values_sort = corr_values.sort_values(kind='quicksort',ascending=False)
corr_values_sort[corr_values_sort>0.7]

* Split Data For Basic Linear Regression

In [None]:
X = df[["TV"]]
y = df[["sales"]]

* Model Train For Basic Linear Regression

In [None]:
reg_model = LinearRegression().fit(X, y)

# y_hat = b + w*TV

# sabit (b - bias)
reg_model.intercept_[0]

# Coefficient of TV (w1)
reg_model.coef_[0][0]

* Prediction

In [None]:
# How much sales are expected if there is a TV expenditure of 150 units?

reg_model.intercept_[0] + reg_model.coef_[0][0]*150

* Plot The Model

In [None]:
g = sns.regplot(x=X, y=y, scatter_kws={'color': 'b', 's': 9},
                ci=False, color="r")

g.set_title(f"Model Denklemi: Sales = {round(reg_model.intercept_[0], 2)} + TV*{round(reg_model.coef_[0][0], 2)}")
g.set_ylabel("Satis Sayisi")
g.set_xlabel("TV Harcamalari")
plt.xlim(-10, 310)
plt.ylim(bottom=0)
plt.show()

* Model Success

* TODO
  * MSE
  * MAE
  * RMSE
  * R-Square

In [None]:
# MSE
y_pred = reg_model.predict(X)
mean_squared_error(y, y_pred)

y.mean()
y.std()

In [None]:
# RMSE
np.sqrt(mean_squared_error(y, y_pred))

In [None]:
# MAE
mean_absolute_error(y, y_pred)

# R-KARE
# Bu yöntem şunu ifade eder.
# Verdiğimiz bağımsız değişken bağımlı değişkeni ne kadar ifade etmektedir.
# Bu veri için TV bağımsız değişkeni %61 değerini vermiştir.
# Burada X değerleri arttıkça R-kare artmaya  meyillidir.
reg_model.score(X, y)