# Random Forest Classifier

## Highest Model Score: 92.80%

In [55]:
from data import Database
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load data
db = Database()
# db.reset()
# db.seed(3000)

df = db.dataframe()

# Find Baseline
baseline = df['Rarity'].value_counts(normalize=True).max()

# Create features and target
target = "Rarity"
X = df.drop(columns=[target, 'Name', 'Type', 'Damage', 'Roll', 'Level'])
y = df[target]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_jobs=-1,
                               random_state=42)

model.fit(x_train, y_train)

# Evaluate model
y_pred = np.array(model.predict(x_test))

print(y_test.shape)
print(y_pred.shape)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}\nBaseline: {baseline}")


(600,)
(600,)
Accuracy: 0.9283333333333333
Baseline: 0.30133333333333334


In [56]:
# Tune hyperparameters
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 25, 40, 55, 70, 85, 100],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(x_train, y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


In [57]:
print(grid_search.best_estimator_.score(x_test, y_test))
best_params = grid_search.best_params_
print(best_params)

0.9333333333333333
{'max_depth': 25, 'min_samples_split': 2, 'n_estimators': 200}


In [42]:
from Fortuna import random_float, random_int
from MonsterLab.monster_data import Random
import pandas as pd
from damage_parser import parse_damage

rand = Random()
# Retrain the model with the best_params.
model_trained = RandomForestClassifier(n_estimators=best_params['n_estimators'],
                                       max_depth=best_params['max_depth'],
                                       min_samples_split=best_params['min_samples_split'],
                                       n_jobs=-1,
                                       random_state=42)

model_trained.fit(x_train, y_train)

options = ["Health", "Energy", "Sanity", "Low", "High", "Rarity"]
stats = [round(random_float(1, 250), 2) for _ in range(3)]
level = random_int(1, 84)
health = stats.pop()
energy = stats.pop()
sanity = stats.pop()
damage = f"{level}d{rand.dice[rand.random_rank()]}{rand.bonus()}"
low, high, _ = parse_damage(damage)

data = pd.DataFrame([dict(zip(options, (health, energy, sanity, low, high)))])

prediction = model.predict(data)

proba_pred = model.predict_proba(data).max()

In [119]:
y_pred = np.array(model.predict(x_test))

print(f"Accuracy: {accuracy_score(y_test, y_pred)}\nBaseline: {baseline}")

Accuracy: 0.9283333333333333
Baseline: 0.30133333333333334


In [70]:
print(f"Prediction: Rank {prediction[0]}\nConfidence: {proba_pred*100}%")

Prediction: Rank 1
Confidence: 40.0%


In [76]:
x_train.shape, y_train.shape

((2400, 5), (2400,))

# Neural Network

## Highest Model Score: 94.83%

In [114]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

tf.keras.backend.clear_session()

model_tf = Sequential([
    Dense(10, activation='relu'),
    Dense(20, activation='relu'),
    Dense(12, activation='relu'),
    Dense(6, activation='softmax')
])

callback = EarlyStopping(monitor='val_loss', patience=3)

model_tf.compile(loss='sparse_categorical_crossentropy',
                 optimizer=Adam(learning_rate=0.001),
                 metrics=['accuracy'])

history = model_tf.fit(x_train, y_train, batch_size=200,
                       epochs=200,
                       callbacks=[callback],
                       validation_data=(x_test, y_test))


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [115]:
accuracy = max(history.history['accuracy'])
val_accuracy = max(history.history['val_accuracy'])
print(f"Accuracy: {accuracy}\nValidation Accuracy: {val_accuracy}")

Accuracy: 0.949999988079071
Validation Accuracy: 0.9549999833106995


In [117]:
loss, accuracy = model_tf.evaluate(x_test, y_test, batch_size=50)



In [118]:
print(f"Evaluation Accuracy: {accuracy}\nEvaluation Loss: {loss}")

Evaluation Accuracy: 0.9483333230018616
Evaluation Loss: 0.15672776103019714
