# Multi Linear Regression 

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

# To visualise in the notebook
%matplotlib inline

In [None]:
multi_df = pd.read_csv("concrete.csv")

Now, let's check the structure of the advertising dataset.

In [None]:
# Display the first 5 rows
multi_df.head()

In [None]:
# Display the last 5 rows
multi_df.tail()

In [None]:
# Let's check the columns
multi_df.info()

In [None]:
# Check the shape of the DataFrame (rows, columns)
multi_df.shape

In [None]:
# Let's look at some statistical information about the dataframe.
multi_df.describe()

# Visualising Data Using Seaborn

In [None]:
# Let us do a correlation analysis among the different dimensions and also each dimension with the dependent dimension
# This is done using scatter matrix function which creates a dashboard reflecting useful information about the dimensions
# The result can be stored as a .png file and opened in say, paint to get a larger view 

#axes = pd.plotting.scatter_matrix(mpg_df_attr)
#plt.tight_layout()
#plt.savefig('d:\greatlakes\mpg_pairpanel.png')

sns.pairplot(multi_df, diag_kind='kde')   # to plot density curve instead of histogram

# kde = Kernel density estimate
#sns.pairplot(mpg_df_attr)  # to plot histogram, the default

# Perfroming Simple Linear Regression

In [None]:
# Putting feature variable to X

X = multi_df.drop("strength", axis=1)
print(X.shape)
# Print the first 5 rows
X.head()

In [None]:
from sklearn import preprocessing

# scale all the columns of the mpg_df. This will produce a numpy array
X_scaled = preprocessing.scale(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns) 

In [None]:
# Putting response variable to y
y = multi_df[['strength']]

# Print the first 5 rows
y.head()

## Splitting Data into Training and Testing Sets

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3 , random_state=5)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

## Performing Linear Regression

In [None]:
# import LinearRegression from sklearn
from sklearn.linear_model import LinearRegression

# Representing LinearRegression as lr(Creating LinearRegression Object)
lr_model = LinearRegression()

# Fit the model using lr.fit()
lr_model.fit(X_train, y_train)

## Coefficients Calculation

In [None]:
# Print the intercept and coefficients
print("Intercept: ", lr_model.intercept_)
for idx, col_name in enumerate(X_train.columns):
    print("The coefficient for {} is {}".format(col_name, lr_model.coef_[0][idx]))


## Predictions

In [None]:
# Making predictions on the testing set
y_pred = lr_model.predict(X_test)

In [None]:
import matplotlib.pyplot as plt
plt.scatter(y_test,y_pred)
plt.xlabel('Y Test')
plt.ylabel('Predicted Y')

In [None]:
print("Train")
print(lr_model.score(X_train, y_train))
print("Test")
print(lr_model.score(X_test, y_test))

In [None]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)

In [None]:
# following is not required since model score already displayed. Shown only for training
r_squared = r2_score(y_test, y_pred)

In [None]:
print('Mean_Squared_Error :' ,mse)
print('r_square_value :',r_squared)

# STATSMODEL

In [None]:
import statsmodels.api as sm
X_train_sm = X_train
#Unlike SKLearn, statsmodels don't automatically fit a constant, 
#so you need to use the method sm.add_constant(X) in order to add a constant. 
X_train_sm = sm.add_constant(X_train_sm)
X_train_sm.shape

In [None]:
# create a fitted model in one line
lm1 = sm.OLS(y_train,X_train_sm).fit()

# print the coefficients
lm1.params

In [None]:
print(lm1.summary())

In [None]:
plt.figure(figsize = (5,5))
sns.heatmap(multi_df.corr(),annot = True)

# Ridge 

In [None]:
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [None]:
ridge = Ridge(alpha=1)
ridge.fit(X_train,y_train)
print ("Ridge model:", (ridge.coef_))

In [None]:
print(ridge.score(X_train, y_train))
print(ridge.score(X_test, y_test))

# Lasso

In [None]:
lasso = Lasso(alpha=0.5)
lasso.fit(X_train,y_train)
print ("Lasso model:", (lasso.coef_))


In [None]:
print(lasso.score(X_test, y_test))
print(lasso.score(X_train, y_train))

# Polynomial models

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
poly = PolynomialFeatures(degree = 2)


In [None]:
X_poly = poly.fit_transform(X_scaled, )
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.30, random_state=1)
X_train.shape

# fit linear regression model

In [None]:
lr_model.fit(X_train, y_train)
print(lr_model.coef_[0])

In [None]:
print(lr_model.score(X_train, y_train))
print(lr_model.score(X_test, y_test))


In [None]:
# Ridge
ridge = Ridge(alpha=1)
ridge.fit(X_train,y_train)
print ("Ridge model:", (ridge.coef_))

In [None]:
print(ridge.score(X_train, y_train))
print(ridge.score(X_test, y_test))


In [None]:
# Lasso
lasso = Lasso(alpha=0.2)
lasso.fit(X_train,y_train)
print ("Lasso model:", (lasso.coef_))


In [None]:
print(lasso.score(X_train, y_train))
print(lasso.score(X_test, y_test))
