# **Shooter Model**
## An XGBoostRegressor Model to predict the ranking (by Rach and Kelly) of an incoming new alcohol:

In [81]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from telnetlib import GA
import sklearn
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score, accuracy_score, f1_score, mean_absolute_percentage_error
from xgboost import XGBRegressor

***

### **Process:**
### Compilation of the shooter csv into a pandas df:

In [83]:
df = pd.read_csv('curr_alcs.csv')
df['alc_percentage'] = df['alc_percentage'].astype(float)
df['if_sweet'] = df['if_sweet'].astype(bool)
df['if_fruit_on_bottle'] = df['if_fruit_on_bottle'].astype(bool)
df['if_citrus'] = df['if_citrus'].astype(bool)
df['shooter_plastic'] = df['shooter_plastic'].astype(bool)
df.head()

Unnamed: 0,rank,name,alc_percentage,if_sweet,if_fruit_on_bottle,if_citrus,shooter_plastic,type,color,brand
0,1,kinky red,0.17,True,True,False,True,vodka,red,kinky
1,2,kinky pink,0.17,True,True,True,True,vodka,red,kinky
2,3,smirnoff raspberry,0.35,True,True,False,True,vodka,clear,smirnoff
3,4,smirnoff peach lemonade,0.35,True,True,True,True,vodka,orange,smirnoff
4,5,smirnoff pink lemonade,0.35,True,True,True,True,vodka,red,smirnoff


### One-Hot Encoding of type, color and brand:

In [84]:
df = pd.get_dummies(data=df, columns=['type', 'brand', 'color'], dtype=bool)
df.head()

Unnamed: 0,rank,name,alc_percentage,if_sweet,if_fruit_on_bottle,if_citrus,shooter_plastic,type_gin,type_liqueur,type_moonshine,...,brand_southern_comfort,brand_stoli,color_black,color_blue,color_brown,color_clear,color_green,color_orange,color_purple,color_red
0,1,kinky red,0.17,True,True,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
1,2,kinky pink,0.17,True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
2,3,smirnoff raspberry,0.35,True,True,False,True,False,False,False,...,False,False,False,False,False,True,False,False,False,False
3,4,smirnoff peach lemonade,0.35,True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,True,False,False
4,5,smirnoff pink lemonade,0.35,True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True


### Prep the data for XGBoost Model:

In [85]:
df = df.drop(['name'], axis=1)
features = df.dtypes[(df.columns != 'rank')].index # Grab all features except that which we are trying to predict

In [86]:
print("split it fully...");
X_train, X_test, y_train, y_test = train_test_split(df[features], df['rank'], test_size=0.2, random_state=40)

split it fully...


In [90]:
print("creating the model...")
model = XGBRegressor(colsample_bytree=0.3, gamma=0, max_depth=3, min_child_weight=3, n_estimators=100, subsample=0.6)
# model = XGBRegressor()

creating the model...


In [91]:
model.fit(X_train, y_train)
print("- predicting...")
y_pred = model.predict(X_test)

- predicting...


In [92]:
score = r2_score(y_test, y_pred)
print("model predicted r2:", score)

model predicted r2: -0.2273768132588263


In [93]:
X_test.head()

Unnamed: 0,alc_percentage,if_sweet,if_fruit_on_bottle,if_citrus,shooter_plastic,type_gin,type_liqueur,type_moonshine,type_rum,type_tequila,...,brand_southern_comfort,brand_stoli,color_black,color_blue,color_brown,color_clear,color_green,color_orange,color_purple,color_red
2,0.35,True,True,False,True,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
16,0.4,False,False,False,True,False,False,False,False,True,...,False,False,False,False,True,False,False,False,False,False
19,0.35,True,False,False,False,False,True,False,False,False,...,False,False,True,False,False,False,False,False,False,False
14,0.4,False,False,False,True,False,False,False,False,True,...,False,False,False,False,False,True,False,False,False,False
15,0.4,False,False,False,True,False,False,False,False,True,...,False,False,False,False,False,True,False,False,False,False


In [94]:
newdf = df.copy()
newdf.head()

Unnamed: 0,rank,alc_percentage,if_sweet,if_fruit_on_bottle,if_citrus,shooter_plastic,type_gin,type_liqueur,type_moonshine,type_rum,...,brand_southern_comfort,brand_stoli,color_black,color_blue,color_brown,color_clear,color_green,color_orange,color_purple,color_red
0,1,0.17,True,True,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
1,2,0.17,True,True,True,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
2,3,0.35,True,True,False,True,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
3,4,0.35,True,True,True,True,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
4,5,0.35,True,True,True,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True


In [95]:
raw_df = pd.read_csv('alcs_w_new.csv')
df = raw_df.drop(['id'], axis = 1)
df['alc_percentage'] = df['alc_percentage'].astype(float)
df['if_sweet'] = df['if_sweet'].astype(bool)
df['if_fruit_on_bottle'] = df['if_fruit_on_bottle'].astype(bool)
df['if_citrus'] = df['if_citrus'].astype(bool)
df['shooter_plastic'] = df['shooter_plastic'].astype(bool)
df = pd.get_dummies(data=df, columns=['type', 'brand', 'color'], dtype=bool)
df.head()

Unnamed: 0,rank,name,alc_percentage,if_sweet,if_fruit_on_bottle,if_citrus,shooter_plastic,type_gin,type_liqueur,type_moonshine,...,brand_southern_comfort,brand_stoli,color_black,color_blue,color_brown,color_clear,color_green,color_orange,color_purple,color_red
0,1.0,kinky pink,0.17,True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
1,2.0,smirnoff raspberry,0.35,True,True,False,True,False,False,False,...,False,False,False,False,False,True,False,False,False,False
2,3.0,smirnoff peach lemonade,0.35,True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,True,False,False
3,4.0,smirnoff pink lemonade,0.35,True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,5.0,kinky aloha,0.17,True,True,True,True,False,False,False,...,False,False,False,False,False,True,False,False,False,False


In [70]:
examined = (df.iloc[-1:])
examined.head()

Unnamed: 0,rank,name,alc_percentage,if_sweet,if_fruit_on_bottle,if_citrus,shooter_plastic,type_gin,type_liqueur,type_moonshine,...,brand_southern_comfort,brand_stoli,color_black,color_blue,color_brown,color_clear,color_green,color_orange,color_purple,color_red
26,,kinky red,0.17,True,True,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True


In [71]:
examined = examined.drop(['name', 'rank'], axis=1)
examined.head()

Unnamed: 0,alc_percentage,if_sweet,if_fruit_on_bottle,if_citrus,shooter_plastic,type_gin,type_liqueur,type_moonshine,type_rum,type_tequila,...,brand_southern_comfort,brand_stoli,color_black,color_blue,color_brown,color_clear,color_green,color_orange,color_purple,color_red
26,0.17,True,True,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True


In [76]:
predicted = model.predict(examined)
print("prediction ranking: #", int(round(predicted[0], 0)), sep='')

prediction ranking: #2
