# Supervised Learning | Regression (Linear Regression)

### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# sns.set()

==========

## Simple Linear Regresssion | Intuition (Fuel Consumption)

Scikit-Learn Linear Module: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.linear_model

OLS Demo: https://www.geogebra.org/m/h7zw5jCG

### Importing & Creating Dataset

In [None]:
fuel = pd.read_csv('datasets/fuel.csv')
fuel

In [None]:
fuel.info()

In [None]:
fuel.describe().round(2)

### Exploring Data

In [None]:
sns.pairplot(fuel)

In [None]:
sns.scatterplot(data = fuel, x = 'Horse Power', y = 'Fuel Economy (MPG)')

In [None]:
sns.regplot(x = 'Horse Power', y =  'Fuel Economy (MPG)', data = fuel)

### Splitting Data

In [None]:
X = fuel[['Horse Power']]
y = fuel['Fuel Economy (MPG)']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

### Model Training & Building 

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
reg = LinearRegression()

In [None]:
reg.fit(X_train, y_train)

In [None]:
reg.coef_

In [None]:
reg.intercept_

### Model Evaluation & Prediction

In [None]:
reg.predict([[300]])

In [None]:
y_pred = reg.predict(X_test)
y_pred

In [None]:
y_test.values

In [None]:
reg.score(X,y)

In [None]:
from sklearn.metrics import r2_score
print(r2_score(y_test, y_pred))

In [None]:
plt.scatter(X_test, y_test)
plt.plot(X_test, y_pred, color = 'g')
plt.xlabel('Horse Power (HP)')
plt.ylabel('MPG')
plt.title('HP vs. MPG (Testing Set)')

==========

## Multiple Linear Regression (MLR) | Full Case-study (Start-Ups)

### Importing Dataset & Extracting Features

In [None]:
startups = pd.read_csv('datasets/startups.csv')
startups.head()

In [None]:
startups.info()

In [None]:
startups.describe().round(2)

### Exploring Data

In [None]:
sns.pairplot(startups)

In [None]:
sns.heatmap(startups.corr(), annot=True)

### Data Splitting & Preprocessing

In [None]:
startups.head()

In [None]:
X = startups.iloc[:, :-1].values
y = startups.iloc[:, -1].values

In [None]:
# ColumnTransformer: 
# This estimator allows different columns or column subsets of the input to be transformed separately 
# and the features generated by each transformer will be concatenated to form a single feature space

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

In [None]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
X

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

### Model Training & Building

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
reg = LinearRegression()
reg.fit(X_train, y_train)

In [None]:
reg.coef_

In [None]:
reg.intercept_

### Model Evaluation

In [None]:
reg.predict([[0.0, 0.0, 1.0, 150000, 130000, 450]])

In [None]:
y_pred = reg.predict(X_test)
y_pred

In [None]:
y_test

In [None]:
from sklearn.metrics import r2_score

In [None]:
r2_score(y_test, y_pred)

In [None]:
reg.score(X,y)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
print(mean_absolute_error(y_test, y_pred))

In [None]:
print(mean_squared_error(y_test, y_pred))

In [None]:
print(np.sqrt(mean_squared_error(y_test, y_pred)))

In [None]:
y.mean()

In [None]:
y_test.mean()

In [None]:
# To Evaluate Model (Compare Calculated Errors WRT Mean)

# Percentage of Error = MAE / mean of y_test 
# Percentage of Error = RMSE / mean of y_test 

# Almost 10 % Error (This is the percentage of error as an average over all the data)


# Error = Means of ( y_pred / y_test )

==========

# THANK YOU!