In [10]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import cross_val_score,train_test_split, GridSearchCV, ParameterGrid, StratifiedKFold, RandomizedSearchCV
from sklearn.metrics import mean_squared_error,r2_score,roc_curve,auc,precision_recall_curve, accuracy_score
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeRegressor,DecisionTreeClassifier
from sklearn.ensemble import VotingRegressor, VotingClassifier, StackingRegressor, StackingClassifier, GradientBoostingRegressor,GradientBoostingClassifier, BaggingRegressor,BaggingClassifier,RandomForestRegressor,RandomForestClassifier,AdaBoostRegressor,AdaBoostClassifier
from sklearn.linear_model import LinearRegression,LogisticRegression, LassoCV, RidgeCV, ElasticNetCV
from sklearn.neighbors import KNeighborsRegressor
import itertools as it
import time as time
import xgboost as xgb
from pyearth import Earth

np.warnings.filterwarnings('ignore')

In [4]:
red_train = pd.read_csv('red_train.csv')
red_test = pd.read_csv('red_test.csv')
white_train = pd.read_csv('white_train.csv')
white_test = pd.read_csv('white_test.csv')

In [5]:
red_train_copy = red_train.copy()
red_test_copy = red_test.copy()
white_train_copy = white_train.copy()
white_test_copy = white_test.copy()
red_train_copy['type'] = 'red'
red_test_copy['type'] = 'red'
white_train_copy['type'] = 'white'
white_test_copy['type'] = 'white'

In [6]:
combined_train = pd.concat([red_train_copy, white_train_copy], axis = 0)
combined_test = pd.concat([red_test_copy, white_test_copy], axis = 0)
combined_train = pd.get_dummies(combined_train)
combined_test = pd.get_dummies(combined_test)

In [7]:
red_train_x = red_train.drop('quality', axis = 1)
red_train_y = red_train['quality']
red_test_x = red_test.drop('quality', axis = 1)
red_test_y = red_test['quality']
white_train_x = white_train.drop('quality', axis = 1)
white_train_y = white_train['quality']
white_test_x = white_test.drop('quality', axis = 1)
white_test_y = white_test['quality']
combined_train_x = combined_train.drop('quality', axis = 1)
combined_train_y = combined_train['quality']
combined_test_x = combined_test.drop('quality', axis = 1)
combined_test_y = combined_test['quality']

# Ensemble for RED wine

In [11]:
# Tuned MARS model by Cindy
model_red_MARS = Earth(max_degree = 4, max_terms = 50).fit(red_train_x, red_train_y)
print("MAE for MARS = ", mean_absolute_error(model_red_MARS.predict(red_test_x), red_test_y))

#Tuned Bagged Decision Tree model by Michael
model_red_DT = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=400, random_state=1,
                        n_jobs=-1,bootstrap_features=False,bootstrap=False,
                        max_features=0.86,max_samples=0.82).fit(red_train_x, red_train_y)
print("MAE for Bagged DT = ", mean_absolute_error(model_red_DT.predict(red_test_x), red_test_y))

#Tuned Random forest model by Sabrina
model_red_RF = RandomForestRegressor(n_estimators=850, random_state=1,
                                     bootstrap = False,n_jobs=-1, max_features=7).fit(red_train_x, red_train_y)
print("MAE for Random forest = ", mean_absolute_error(model_red_RF.predict(red_test_x), red_test_y))

#Tuned XGBoost model by Keaton
model_red_xgb = xgb.XGBRegressor(random_state = 1, gamma = 0, learning_rate = 0.01, max_depth = 14, n_estimators = 4000,
                                  reg_lambda = 0.1, subsample = 0.5).fit(red_train_x, red_train_y)
print("MAE for XGBoost = ", mean_absolute_error(model_red_xgb.predict(red_test_x), red_test_y))

MAE for MARS =  0.4850365465825792
MAE for Bagged DT =  0.38671875
MAE for Random forest =  0.3612529411764706
MAE for XGBoost =  0.37815727829933166


In [12]:
#Voting ensemble for RED wine
en=VotingRegressor(estimators = [('mars',model_red_MARS),('bdt', model_red_DT),('rf',model_red_RF),('xgb',model_red_xgb)])
en.fit(red_train_x, red_train_y)
print("Ensemble model MAE = ", mean_absolute_error(en.predict(red_test_x), red_test_y))

Ensemble model MAE =  0.3942025813713774


# Ensemble for WHITE wine

In [13]:
# Tuned MARS model by Cindy
model_white_MARS = Earth(max_degree = 2).fit(white_train_x, white_train_y)
print("MAE for MARS = ", mean_absolute_error(model_white_MARS.predict(white_test_x), white_test_y))

#Tuned Bagged Decision Tree model by Michael
model_white_DT = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=400, random_state=1,
                        n_jobs=-1,bootstrap_features=False,bootstrap=False,
                        max_features=0.645,max_samples=0.945).fit(white_train_x, white_train_y)
print("MAE for Bagged DT = ", mean_absolute_error(model_white_DT.predict(white_test_x), white_test_y))

#Tuned Random forest model by Sabrina
model_white_RF = RandomForestRegressor(n_estimators=700, random_state=1, max_features = 2,
                                       bootstrap = False).fit(white_train_x, white_train_y)
print("MAE for Random forest = ", mean_absolute_error(model_white_RF.predict(white_test_x), white_test_y))

#Tuned XGBoost model by Keaton
model_white_xgb = xgb.XGBRegressor(random_state = 1, gamma = 0, learning_rate = 0.01, max_depth = 16, n_estimators = 3000,
                                  reg_lambda = 0.1, subsample = 0.75).fit(white_train_x, white_train_y)
print("MAE for XGBoost = ", mean_absolute_error(model_white_xgb.predict(white_test_x), white_test_y))

MAE for MARS =  0.5814239667474026
MAE for Bagged DT =  0.4079479591836735
MAE for Random forest =  0.39846880466472306
MAE for XGBoost =  0.40231834917652365


In [14]:
#Voting ensemble for WHITE wine
en=VotingRegressor(estimators = [('mars',model_white_MARS),('bdt', model_white_DT),('rf',model_white_RF),('xgb',model_white_xgb)])
en.fit(white_train_x, white_train_y)
print("Ensemble model MAE = ", mean_absolute_error(en.predict(white_test_x), white_test_y))

Ensemble model MAE =  0.43670804876124486


# Ensemble for COMBINED wine

In [15]:
# Tuned MARS model by Cindy
model_combined_MARS = Earth(max_degree = 5).fit(combined_train_x, combined_train_y)
print("MAE for MARS = ", mean_absolute_error(model_combined_MARS.predict(combined_test_x), combined_test_y))

#Tuned Bagged Decision Tree model by Michael
model_combined_DT = BaggingRegressor(base_estimator=DecisionTreeRegressor(), n_estimators=400, random_state=1,
                        n_jobs=-1,bootstrap_features=False,bootstrap=False,
                        max_features=0.69,max_samples=0.995).fit(combined_train_x, combined_train_y)
print("MAE for Bagged DT = ", mean_absolute_error(model_combined_DT.predict(combined_test_x), combined_test_y))

#Tuned Random forest model by Sabrina
model_combined_RF = RandomForestRegressor(n_estimators=830, random_state=1, max_features = 3,
                                          bootstrap = False).fit(combined_train_x, combined_train_y)
print("MAE for Random forest = ", mean_absolute_error(model_combined_RF.predict(combined_test_x), combined_test_y))

#Tuned XGBoost model by Keaton
model_combined_xgb = xgb.XGBRegressor(random_state = 1, gamma = 0, learning_rate = 0.01, max_depth = 16, n_estimators = 4000,
                                  reg_lambda = 0.3, subsample = 0.75).fit(combined_train_x, combined_train_y)
print("MAE for XGBoost = ", mean_absolute_error(model_combined_xgb.predict(combined_test_x), combined_test_y))

MAE for MARS =  0.6389752225335108
MAE for Bagged DT =  0.40108923076923075
MAE for Random forest =  0.39270139017608896
MAE for XGBoost =  0.39411787649301383


In [16]:
#Voting ensemble for COMBINED wine
en=VotingRegressor(estimators = [('mars',model_combined_MARS),('bdt', model_combined_DT),('rf',model_combined_RF),('xgb',model_combined_xgb)])
en.fit(combined_train_x, combined_train_y)
print("Ensemble model MAE = ", mean_absolute_error(en.predict(combined_test_x), combined_test_y))

Ensemble model MAE =  0.4494490419962053
