>#  <ins>Pricing - Example with Demodata by sklearn</ins>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline


>## Boston Housing Dataset

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()

>### Boston data detailed information

In [None]:
print(boston.DESCR)


>### feature_names gives Informations about the columnnames

In [None]:
boston.feature_names


>### Converting the data to dataframe

In [None]:
df = pd.DataFrame(boston.data, columns=boston.feature_names)

In [None]:
df.head()


In [None]:
df['MEDV'] = boston.target
df.head()

>### Quality Check

In [None]:
df.info()

In [None]:
df.describe()

>## Visualization

>>### Pairplots of dataframe

In [None]:
sns.pairplot(df)

In [None]:
rows = 7
cols = 2
fig, ax = plt.subplots(nrows= rows, ncols= cols, figsize = (16,16))
col = df.columns
index = 0
for i in range(rows):
    for j in range(cols):
        sns.distplot(df[col[index]], ax = ax[i][j])
        index = index + 1
plt.tight_layout()


>## Correlations

In [None]:
fig, ax = plt.subplots(figsize = (16, 9))
sns.heatmap(df.corr(), annot = True, annot_kws={'size': 12})


In [None]:
def getCorrelatedFeature(corrdata, threshold):
    feature = []
    value = []
    for i, index in enumerate(corrdata.index):
        if abs(corrdata[index])> threshold:
            feature.append(index)
            value.append(corrdata[index])
    df = pd.DataFrame(data = value, index = feature, columns=['Corr Value'])
    return df

In [None]:
threshold = 0.4
corr_value = getCorrelatedFeature(df.corr()['MEDV'], threshold)

In [None]:
corr_value.index.values

In [None]:
correlated_data = df[corr_value.index]
correlated_data.head()

In [None]:
X = correlated_data.drop(labels=['MEDV'], axis = 1)
y = correlated_data['MEDV']


>## Linear Regression

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,random_state=1)

In [None]:
from sklearn.linear_model import LinearRegression
lm = LinearRegression()

In [None]:
lm.fit(X_train,y_train)

>### Result in a mathematical / visual way

In [None]:
 predictions = lm.predict(X_test)

In [None]:
 plt.scatter(y_test,predictions)

In [None]:
sns.distplot((y_test-predictions),bins=50)

>### y-axis of a linear function

In [None]:
lm.intercept_


>### Coefficients of a linear regression function

In [None]:
lm.coef_

>### Linear regression function definition

In [None]:
def lin_func(values, coefficients=lm.coef_, y_axis=lm.intercept_):
    return np.dot(values, coefficients) + y_axis

>### Samples

In [None]:
from random import randint
for i in range(5):
    index = randint(0,len(df)-1)
    sample = df.iloc[index][corr_value.index.values].drop('MEDV')
    print(
    'PREDICTION: ', round(lin_func(sample),2),
    ' // REAL: ',df.iloc[index]['MEDV'],
    ' // DIFFERENCE: ', round(round(lin_func(sample),2) - df.iloc[index]['MEDV'],2)
    )