# Regression with Fishing Data
#### University of Denver
## Isabel Osgood

In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso

In [20]:
#data from fishing boats 
fishdf = pd.read_csv("Fishing.csv")

fishdf.head()

Unnamed: 0.1,Unnamed: 0,mode,price,catch,pbeach,ppier,pboat,pcharter,cbeach,cpier,cboat,ccharter,income
0,1,charter,182.93,0.5391,157.93,157.93,157.93,182.93,0.0678,0.0503,0.2601,0.5391,7083.3317
1,2,charter,34.534,0.4671,15.114,15.114,10.534,34.534,0.1049,0.0451,0.1574,0.4671,1249.9998
2,3,boat,24.334,0.2413,161.874,161.874,24.334,59.334,0.5333,0.4522,0.2413,1.0266,3749.9999
3,4,pier,15.134,0.0789,15.134,15.134,55.93,84.93,0.0678,0.0789,0.1643,0.5391,2083.3332
4,5,boat,41.514,0.1082,106.93,106.93,41.514,71.014,0.0678,0.0503,0.1082,0.324,4583.332


In [21]:
#drop first two columns: 0 error 1 categorical 
fishdf = fishdf.drop(['Unnamed: 0', 'mode'], axis = 1) 

#standardize 
scalar = StandardScaler()
fish_standard = pd.DataFrame(scalar.fit_transform(fishdf), columns=fishdf.columns)
fish_standard

#split 
y = fish_standard['income']
X = fish_standard.drop(['income'], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [22]:
#create linear model 
lr = LinearRegression()
fish_lr = lr.fit(X_train, y_train)
#predict
train_pred = fish_lr.predict(X_train)
test_pred = fish_lr.predict(X_test)

#get MSE per train and test
print("Training MSE")
print(' ', mean_squared_error(y_train, train_pred))
print("Testing MSE")
print(' ', mean_squared_error(y_test, test_pred))
#print(mean_squared_error(y_train, train_pred) + mean_squared_error(y_test, test_pred))

Training MSE
  0.5178569620341031
Testing MSE
  0.46038632570242666


In [23]:
#Lasso model 
lasso = Lasso()
fish_las = lasso.fit(X_train, y_train)
#predict
train_pred = fish_las.predict(X_train)
test_pred = fish_las.predict(X_test)

#get MSE per train and test
print("Training MSE")
print(' ', mean_squared_error(y_train, train_pred))
print("Testing MSE")
print(' ', mean_squared_error(y_test, test_pred))
#print(mean_squared_error(y_train, train_pred) + mean_squared_error(y_test, test_pred))

Training MSE
  1.0299873618704116
Testing MSE
  0.9394094633874402


In [24]:
from sklearn.linear_model import Ridge
#Ridge model 
r = Ridge()
r = r.fit(X_train, y_train)
#predict
train_pred = r.predict(X_train)
test_pred = r.predict(X_test)

#get MSE per train and test
print("Training MSE")
print(' ', mean_squared_error(y_train, train_pred))
print("Testing MSE")
print(' ', mean_squared_error(y_test, test_pred))

#print(mean_squared_error(y_train, train_pred) +mean_squared_error(y_test, test_pred))

Training MSE
  0.5193965448142415
Testing MSE
  0.46027976053671205


In [28]:
from sklearn.model_selection import GridSearchCV
#Tuning Lasso and Ridge alphas 

alpha_grid = {'alpha': [0.1, 0.2, 0.4, 0.6, 1.0, 1.4]}
lasso = GridSearchCV(estimator=Lasso(), param_grid = alpha_grid)
model = lasso.fit(X_train, y_train)
model.best_params_


{'alpha': 0.1}

In [29]:
ridge = GridSearchCV(Ridge(), alpha_grid)
model = ridge.fit(X_train, y_train)
model.best_params_

{'alpha': 0.4}