##### `Practicing Multiple Linear Regression Model`

In [9]:
# Ignore warnings code
from warnings import filterwarnings
filterwarnings("ignore")

print('Multiple Linear Regression - Start')
print('................................', '\n')

#-----------------------
# Step-1: Data Ingestion
#-----------------------

print('[Step-1]: Data Ingestion', '\n')
import pandas as pd
df = pd.read_csv('50_Startups.csv'); print('Actual 50_Startups.csv file data size: ', df.shape , '\n')

#---------------------------------------------
# Step-2 Convert Categorical values to numeric
#---------------------------------------------

print('[Step-2]: Categoric Columns Converted to Numeric columns', '\n')
#Here STATE is categorical which needs to be converted to numeric using get_dummies
print(pd.get_dummies(df['STATE'], dtype=int, drop_first=True).head(1), '\n')

#-------------------------
# Step-3: Separate X and Y
#-------------------------

print('[Step-3]: Separate X and Y','\n')
X_Numeric = df[['RND', 'ADMIN', 'MKT']]; X_Categoric = pd.get_dummies(df['STATE'], dtype=int, drop_first=True)
X_Pre = X_Numeric.join(X_Categoric)
Y = df["PROFIT"]
print('X Cols Size:',X_Pre.shape, 'Y Cols Size:', Y.shape, '\n')

#--------------------
# Step-4: Build Model
#--------------------

print('[Step-4]: Build Model','\n')
from sklearn.linear_model import LinearRegression
model = LinearRegression(); model.fit(X_Pre, Y)
print('Coeff:')
print(model.coef_, '\n')
print('Intercept:')
print(model.intercept_, '\n')
lr_model_coeffs = pd.Series(model.coef_, index=X_Pre.columns)
print('Columns Coefficients:')
print(lr_model_coeffs, '\n')

#----------------------
# Step-5 Evaluate Model
#----------------------

print('[Step-5]: Evaluate Model','\n')
from sklearn.metrics import(root_mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score)

def evaluate_model(model, x, y):
    y_pred = model.predict(x)
    rmse = root_mean_squared_error(y, y_pred); mae = mean_absolute_error(y, y_pred)
    mape = mean_absolute_percentage_error(y, y_pred); r2 = r2_score(y, y_pred)
    return f'[RMSE: {rmse:.2f}| MAE:{mae:.2f}| MAPE: {mape:.2%}| R2_SCORE: {r2:.2%}]'


print('Model Metrics:', evaluate_model(model, X_Pre, Y))

#-------------------------------
# Step-6 Out of Model Prediction
#-------------------------------

print('[Step-6]: Out of Model Prediction','\n')
rnd = 50000; admin = 40000; mkt = 45000; state = [0, 0, 1]

xnew = [[rnd, admin, mkt, state[1], state[2]]]
print(xnew)
model.predict(xnew)
print('\n','Multiple Linear Regression - End')
print('................................', '\n')



Multiple Linear Regression - Start
................................ 

[Step-1]: Data Ingestion 

Actual 50_Startups.csv file data size:  (50, 5) 

[Step-2]: Categoric Columns Converted to Numeric columns 

   Florida  New York
0        0         1 

[Step-3]: Separate X and Y 

X Cols Size: (50, 5) Y Cols Size: (50,) 

[Step-4]: Build Model 

Coeff:
[ 8.06023114e-01 -2.70043196e-02  2.69798610e-02  1.98788793e+02
 -4.18870191e+01] 

Intercept:
50125.343831604216 

Columns Coefficients:
RND           0.806023
ADMIN        -0.027004
MKT           0.026980
Florida     198.788793
New York    -41.887019
dtype: float64 

[Step-5]: Evaluate Model 

Model Metrics: [RMSE: 8854.76| MAE:6475.50| MAPE: 10.60%| R2_SCORE: 95.08%]
[Step-6]: Out of Model Prediction 

[[50000, 40000, 45000, 0, 1]]

 Multiple Linear Regression - End
................................ 

