<a href="https://colab.research.google.com/github/danielbauer1979/ML_656/blob/main/Module9_LasVegasExample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np 
import matplotlib.pyplot as plt  
import pandas as pd 
import seaborn as sns
import graphviz
import pydot
from io import StringIO  

from sklearn.preprocessing import StandardScaler, scale
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, multilabel_confusion_matrix

import tensorflow as tf
from tensorflow import keras
from keras.utils import np_utils
from tensorflow.keras import layers

# Las Vegas Dataset

In [None]:
!git clone https://github.com/danielbauer1979/ML_656.git

In [None]:
lasvegas = pd.read_csv('ML_656/LasVegasTripAdvisorReviews.csv')
lasvegas.head()

In [None]:
numerics = list(lasvegas.select_dtypes(include=['int64']).columns)
numerics.remove('Hotel stars')
numerics.remove('Score')
factors = list(lasvegas.select_dtypes(include=['object']).columns)
factors.append('Hotel stars')
factors.remove('User country')
factors.remove('Hotel name')

In [None]:
lasvegas_numcols = lasvegas[numerics]
lasvegas_faccols = lasvegas[factors]
dummies = pd.get_dummies(lasvegas_faccols, drop_first=True)
lasvegas_numcols_sc_0 = scale(lasvegas_numcols)
lasvegas_numcols_sc = pd.DataFrame(data=lasvegas_numcols_sc_0, columns = lasvegas_numcols.columns, index = dummies.index)
lasvegas_sc = pd.concat([lasvegas_numcols_sc, dummies], axis = 1)
lasvegas_sc = pd.concat([lasvegas_sc, lasvegas['Score']], axis =1)
lasvegas_sc = lasvegas_sc.rename(columns={"Score":"Y"})
lasvegas_sc.head()

In [None]:
np.random.seed(42)
train, test = train_test_split(lasvegas_sc, test_size = 0.3)
X_train = train.drop(columns = ['Y']).values
y_train = train['Y'].values
X_test = test.drop(columns = ['Y']).values
y_test = test['Y'].values

## Neural, once again

In [None]:
inputs = keras.Input(shape=(17,))
x = layers.Dense(3, activation="relu", name="dense_1")(inputs)
x = layers.Dense(2, activation="relu", name="dense_2")(x)
outputs = layers.Dense(1, name="predictions")(x)
lasvegas_reg_nnet = keras.Model(inputs=inputs, outputs=outputs)
lasvegas_reg_nnet.compile(
  optimizer=keras.optimizers.Adam(learning_rate=0.004),
  # Loss function to minimize
  loss='mean_squared_error',
  # List of metrics to monitor
)

In [None]:
history_reg = lasvegas_reg_nnet.fit(
  X_train,
  y_train,
  batch_size=50,
  epochs=850
)

In [None]:
lasvegas_nnet_reg_preds = lasvegas_reg_nnet.predict(X_test)
lasvegas_nnet_reg_preds = lasvegas_nnet_reg_preds.reshape((152,))
np.corrcoef(lasvegas_nnet_reg_preds,y_test)[0,1] * np.corrcoef(lasvegas_nnet_reg_preds,y_test)[0,1]

In [None]:
lasvegas_nnet_reg_preds

In [None]:
lasvegas_nnet_reg_preds_clss = np.round(lasvegas_nnet_reg_preds)
lasvegas_nnet_reg_preds_clss

In [None]:
confusion_matrix(y_test, lasvegas_nnet_reg_preds_clss)

## Multinomial Regression

In [None]:
multin_model = LogisticRegression(fit_intercept=True, multi_class='multinomial',max_iter=10000).fit(X_train,y_train)

In [None]:
y_mnmod_pred = multin_model.predict(X_test)
y_mnmod_pred

In [None]:
y_test

In [None]:
np.corrcoef(y_mnmod_pred,y_test)[0,1] * np.corrcoef(y_mnmod_pred,y_test)[0,1]

In [None]:
confusion_matrix(y_test, y_mnmod_pred)

In [None]:
multilabel_confusion_matrix(y_test, y_mnmod_pred)

In [None]:
accuracy_score(y_test, y_mnmod_pred)

## Neural Nets

In [None]:
dummy_y_train = np_utils.to_categorical(y_train)

In [None]:
train.info()

In [None]:
inputs = keras.Input(shape=(17,))
x = layers.Dense(3, activation="relu", name="dense_1")(inputs)
x = layers.Dense(2, activation="relu", name="dense_2")(x)
outputs = layers.Dense(6, activation='softmax', name="predictions")(x)
lasvegas_nnet = keras.Model(inputs=inputs, outputs=outputs)
lasvegas_nnet.compile(
  optimizer=keras.optimizers.Adam(learning_rate=0.005),
  # Loss function to minimize
  loss='categorical_crossentropy',
  # List of metrics to monitor
  metrics=['accuracy'],
)

In [None]:
history = lasvegas_nnet.fit(
  X_train,
  dummy_y_train,
  batch_size=50,
  epochs=1000
)

In [None]:
lasvegas_nnet_val_preds = lasvegas_nnet.predict(X_test)
lasvegas_nnet_val_preds_class = np.argmax(lasvegas_nnet_val_preds, axis = 1)
lasvegas_nnet_val_preds_class

In [None]:
y_test

In [None]:
accuracy_score(y_test, lasvegas_nnet_val_preds_class)

In [None]:
confusion_matrix(y_test, lasvegas_nnet_val_preds_class)

In [None]:
np.corrcoef(lasvegas_nnet_val_preds_class,y_test)[0,1] * np.corrcoef(lasvegas_nnet_val_preds_class,y_test)[0,1]

In [None]:
accuracy_score(y_test, lasvegas_nnet_reg_preds_clss)