### Implementation of Linear Regression and Random Forest Regressor for Regression Problem

##### 1. Import the necessary libraries as required 
##### 2. Read excel file to dataframe
##### 3. Standardize training and test datasets.
##### 4. Convert the standard scalars to dataframes
##### 5. Instantiate Linear Regressor and fit the model for training data
##### 6. Display MSE and R^2 for Linear Regressor 
##### 7. Instantiate Random Forest Regressor and fit the model for training data
##### 8. Display MSE and R^2 for Random Forest Regressor 


In [3]:
import warnings                                           #import the necessary libraries required 
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import scatter_matrix
# from sklearn.preprocessing import Imputer
from sklearn.impute import SimpleImputer 
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn import metrics

housing = pd.read_excel("C:/Users/amrut/Documents/UTA documents/Courses/Summer 2020/Data Science/HW/HW3/ca_housing.xlsx")
housing["ocean_proximity"] = housing["ocean_proximity"].map({"<1H OCEAN":1,"INLAND":2,"ISLAND":3,"NEAR BAY":4,"NEAR OCEAN":5})
housing_ind = housing.drop("median_house_value",axis=1)           # take column to be predicted as independant variable
housing_dep = housing["median_house_value"]                       # take rest of the column as dependant variables
X_train,X_test,y_train,y_test = train_test_split(housing_ind,housing_dep,test_size=0.2,random_state=42)
independent_scaler = StandardScaler()                             # standardize training and test datasets.

X_train_scalar = independent_scaler.fit_transform(X_train)
X_test_scalar = independent_scaler.transform(X_test)
X_train_scalar_df=pd.DataFrame(data=X_train_scalar)
X_test_scalar_df=pd.DataFrame(data=X_test_scalar)

linearRegModel = LinearRegression()                               # initantiate the linear regressor
linearRegModel.fit(X_train_scalar_df,y_train)                     # fit the linear regressor

X_train_scalar_df.fillna(X_train_scalar_df.mean(),inplace = True) # convert the standard scalars to dataframes
X_test_scalar_df.fillna(X_train_scalar_df.mean(),inplace = True)

y_pred = linearRegModel.predict(X_test_scalar)                    # predict test data in linear regressor

print("--------------------Linear Regressor----------------------")

print("\nMSE for train: {}".format(metrics.mean_squared_error(y_train,linearRegModel.predict(X_train_scalar_df))))
print("MSE for test: {}".format(metrics.mean_squared_error(y_test,y_pred)))

from sklearn.metrics import mean_squared_error
print("\nR^2 for train: {}".format(linearRegModel.score(X_train_scalar_df, y_train)))
print("R^2 for test: {}".format(linearRegModel.score(X_test_scalar_df, y_test)))

rfReg = RandomForestRegressor(30)                                # initantiate the Random Forest regressor
rfReg.fit(X_train_scalar_df,y_train)                             # fit the Random Forest regressor
rfReg_y_pred = rfReg.predict(X_test_scalar_df)                   # predict test data in Random Forest regressor

print("\n--------------------Random Forest Regressor----------------------")

print("\nMSE for train: {}".format(metrics.mean_squared_error(y_train,rfReg.predict(X_train_scalar_df))))
print("MSE for test: {}".format(metrics.mean_squared_error(y_test,rfReg_y_pred)))
print("\nR^2 for train: {}".format(rfReg.score(X_train_scalar_df, y_train)))
print("R^2 for test: {}".format(rfReg.score(X_test_scalar_df, y_test)))


--------------------Linear Regressor----------------------

MSE for train: 4810958229.787787
MSE for test: 5055026565.872766

R^2 for train: 0.6401079709888613
R^2 for test: 0.6142405424710489

--------------------Random Forest Regressor----------------------

MSE for train: 376556178.9633291
MSE for test: 2547193834.761921

R^2 for train: 0.9718310654944573
R^2 for test: 0.8056184079125215
