# Model Selection

From step 5, model 3 have the highest R-square and lowest RMSE. So we will choose model 3 as our final model.

In [1]:
import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 12})
%matplotlib inline

In [2]:
pwd

'D:\\Jupyter_Notebook\\Springboard\\Guided_Capstone'

In [3]:
df = pd.read_csv('D:\Jupyter_Notebook\Springboard\Guided_Capstone\data\step3_output.csv')

In [4]:
df.columns

Index(['Name', 'state', 'summit_elev', 'vertical_drop', 'trams', 'fastEight',
       'fastSixes', 'fastQuads', 'quad', 'triple', 'double', 'surface',
       'total_chairs', 'Runs', 'TerrainParks', 'LongestRun_mi',
       'SkiableTerrain_ac', 'Snow Making_ac', 'daysOpenLastYear', 'yearsOpen',
       'averageSnowfall', 'AdultWeekday', 'AdultWeekend', 'projectedDaysOpen',
       'NightSkiing_ac', 'cluster'],
      dtype='object')

In [5]:
#select Big Mountain Resort
BigMountain = df[df['Name'].str.contains("Big Squaw Mountain Ski Resort")]
BigMountain

Unnamed: 0,Name,state,summit_elev,vertical_drop,trams,fastEight,fastSixes,fastQuads,quad,triple,...,SkiableTerrain_ac,Snow Making_ac,daysOpenLastYear,yearsOpen,averageSnowfall,AdultWeekday,AdultWeekend,projectedDaysOpen,NightSkiing_ac,cluster
74,Big Squaw Mountain Ski Resort,Maine,3200,660,0,0.0,0,0,0,1,...,100.0,0.0,67.0,6.0,45.0,30.0,30.0,58.0,0.0,2


In [6]:
# remove useless features and the target variable
BigMountain.drop(columns=['Name','state','summit_elev', 'AdultWeekend'],inplace = True)
BigMountain

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


Unnamed: 0,vertical_drop,trams,fastEight,fastSixes,fastQuads,quad,triple,double,surface,total_chairs,...,LongestRun_mi,SkiableTerrain_ac,Snow Making_ac,daysOpenLastYear,yearsOpen,averageSnowfall,AdultWeekday,projectedDaysOpen,NightSkiing_ac,cluster
74,660,0,0.0,0,0,0,1,0,0,1,...,0.8,100.0,0.0,67.0,6.0,45.0,30.0,58.0,0.0,2


In [7]:
# remove Big Mountain Resort away from the dataset
df.drop([74],inplace = True)

In [8]:
#check row number
df.shape

(329, 26)

In [9]:
# remove state and name and summit_elev
df_nostate_nosummit = df.drop(columns=['Name','state','summit_elev'])

In [10]:
# set features and target variables
newX = df_nostate_nosummit.drop('AdultWeekend',axis=1)
newY = df_nostate_nosummit['AdultWeekend']

In [11]:
#fit a linear model
lm = LinearRegression()
model = lm.fit(newX,newY)

The R-square is 0.756, which is relatively big, meaning that the variability in the target variable has been explained by the regression. RMSR is 13.013, it is a relatively high for this problem. 

In [12]:
pred= lm.predict(newX)
RMSE = np.sqrt(mean_squared_error(newY,pred))
R2 = r2_score(newY, pred)
print("RMSE:",RMSE) 
print("R^2:",R2)

RMSE: 13.0137941262162
R^2: 0.7559727286876537


In [16]:
coef = pd.DataFrame({'feature': newX.columns, 'coef':lm.coef_})
coef

Unnamed: 0,feature,coef
0,vertical_drop,0.006626
1,trams,-1.883421
2,fastEight,-55.297963
3,fastSixes,12.184338
4,fastQuads,9.616111
5,quad,7.719153
6,triple,6.749873
7,double,7.072432
8,surface,7.448936
9,total_chairs,-6.390541


In [19]:
coef.sort_values(by=['coef'], ascending=False)

Unnamed: 0,feature,coef
3,fastSixes,12.184338
4,fastQuads,9.616111
5,quad,7.719153
8,surface,7.448936
7,double,7.072432
6,triple,6.749873
18,AdultWeekday,0.635074
12,LongestRun_mi,0.307849
10,Runs,0.067996
15,daysOpenLastYear,0.039655


In [14]:
#Predict the weekend adult price of Big Mountain Resor
BigMountain_price= lm.predict(BigMountain)
BigMountain_price

array([39.23281327])

The current price for Big Mountain Resort is $30. The model tells us that Big Mountain Resort is able to set the price up to $39.23 with its features. Every year about 350,000 people ski or snowboard at Big Mountainl. Assume 50% of them come at the weekend.  In order to cover the additional $1,540,000, The Resort only need to increase the adult weekend price by $8.8. 

In [15]:
1540000/(350000*0.5)

8.8