In [1]:
""" Monster Database Interface """
from os import getenv
import pandas as pd
from certifi import where
from dotenv import load_dotenv
from MonsterLab import Monster
from pandas import DataFrame
from pymongo import MongoClient
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline


In [2]:
class MongoDB:
    """ A class representing a MongoDB database connection and operations on a specific collection. """
    load_dotenv()
    database = MongoClient(getenv("DB_URL"), tlsCAFile=where())['BandersnatchStarter']

    def __init__(self, collection: str):
        """ Initialize the MongoDB instance with a specific collection. """
        self.collection = self.database[collection]

    def seed(self, amount):
        """ Inserts the specified number of documents into the collection """
        add_list = []
        monster_count = 0
        for _ in range(amount):
            monster = Monster()
            monster_data = {
                "Name": monster.name,
                "Type": monster.type,
                "Level": monster.level,
                "Rarity": monster.rarity,
                "Damage": monster.damage,
                "Health": monster.health,
                "Energy": monster.energy,
                "Sanity": monster.sanity,
                "Timestamp": monster.timestamp
            }
            add_list.append(monster_data)
            monster_count += 1

        result = self.collection.insert_many(add_list)
        print(f"There were {monster_count} documents inserted.")

    def reset(self):
        """ Drop the entire collection to clear all documents """
        self.collection.drop()
        print(f"Collection '{self.collection.name}' has been reset.")

    def count(self) -> int:
        """ Count all documents in collection """
        return self.collection.count_documents({})

    def dataframe(self) -> DataFrame:
        """ Create Pandas DataFrame from mongoDB collection """
        documents = list(self.collection.find())
        df = pd.DataFrame(documents)
        return df

    def html_table(self) -> str:
        """ Return the pandas dataframe in a html formatted table. """
        df = self.dataframe()

        if df.empty:
            return "None"

        df = df.drop(columns=['_id'], errors='ignore')
        html_table = df.to_html(index=True)
        return html_table

In [3]:
db = MongoDB('Collection')

In [4]:
df = db.dataframe()

In [5]:
df.head()


Unnamed: 0,_id,Name,Type,Level,Rarity,Damage,Health,Energy,Sanity,Timestamp
0,65ceb12383855ca60223cc86,Pit Lord,Devilkin,14,Rank 1,14d4+2,54.57,57.59,55.12,2024-02-15 16:49:39
1,65ceb12383855ca60223cc87,Diamond Archfey,Fey,10,Rank 1,10d4+2,38.25,39.21,39.54,2024-02-15 16:49:39
2,65ceb12383855ca60223cc88,Shadow Elemental,Elemental,1,Rank 0,1d2+4,1.61,1.82,1.73,2024-02-15 16:49:39
3,65ceb12383855ca60223cc89,Hell Hound,Demonic,11,Rank 2,11d6,65.03,67.99,65.18,2024-02-15 16:49:39
4,65ceb12383855ca60223cc8a,Bronze Faerie,Fey,3,Rank 4,3d10+1,32.69,30.54,30.11,2024-02-15 16:49:39


In [6]:
html_df = df.to_html()

In [12]:
html_df

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>_id</th>\n      <th>Name</th>\n      <th>Type</th>\n      <th>Level</th>\n      <th>Rarity</th>\n      <th>Damage</th>\n      <th>Health</th>\n      <th>Energy</th>\n      <th>Sanity</th>\n      <th>Timestamp</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>65ceb12383855ca60223cc86</td>\n      <td>Pit Lord</td>\n      <td>Devilkin</td>\n      <td>14</td>\n      <td>Rank 1</td>\n      <td>14d4+2</td>\n      <td>54.57</td>\n      <td>57.59</td>\n      <td>55.12</td>\n      <td>2024-02-15 16:49:39</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>65ceb12383855ca60223cc87</td>\n      <td>Diamond Archfey</td>\n      <td>Fey</td>\n      <td>10</td>\n      <td>Rank 1</td>\n      <td>10d4+2</td>\n      <td>38.25</td>\n      <td>39.21</td>\n      <td>39.54</td>\n      <td>2024-02-15 16:49:39</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>65c

In [27]:
df = df.drop(columns=['_id'], errors='ignore')

In [13]:
import altair as alt
from altair import Chart, Tooltip

alt.data_transformers.enable('vegafusion')

def chart(df: pd.DataFrame, x: str, y:  str, target: str) -> alt.Chart:
    graph = Chart(
        df, 
        title=f"{y} by {x} for {target}").mark_circle(size=100).encode(
        x=x,
        y=y,
        color=target,
        tooltip=Tooltip(df.columns.to_list())
    )
    return graph

In [14]:
chart(df, 'Level', 'Energy', 'Rarity')


In [15]:
df.head()

Unnamed: 0,Name,Type,Level,Rarity,Damage,Health,Energy,Sanity,Timestamp
0,Pit Lord,Devilkin,14,Rank 1,14d4+2,54.57,57.59,55.12,2024-02-15 16:49:39
1,Diamond Archfey,Fey,10,Rank 1,10d4+2,38.25,39.21,39.54,2024-02-15 16:49:39
2,Shadow Elemental,Elemental,1,Rank 0,1d2+4,1.61,1.82,1.73,2024-02-15 16:49:39
3,Hell Hound,Demonic,11,Rank 2,11d6,65.03,67.99,65.18,2024-02-15 16:49:39
4,Bronze Faerie,Fey,3,Rank 4,3d10+1,32.69,30.54,30.11,2024-02-15 16:49:39


In [28]:
X = df[['Level', 'Energy', 'Health', 'Sanity']]
y = df['Rarity']

In [29]:
print(X.shape)
print(y.shape)

(2500, 4)
(2500,)


In [30]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
X_train.shape

(2000, 4)

In [32]:
X_val.shape

(500, 4)

In [51]:
from sklearn.preprocessing import OrdinalEncoder
model_rf = RandomForestClassifier(random_state=42, n_jobs=-1, max_depth=7)
model_rf.fit(X_train, y_train)

print('Forest: Training Accuracy:', model_rf.score(X_train, y_train))
print('Forest: Validation Accuracy', model_rf.score(X_val, y_val))

Forest: Training Accuracy: 0.975
Forest: Validation Accuracy 0.944


In [60]:
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

xg_classifier = XGBClassifier(n_estimators=10, max_depth=7, random_state=42, eval_metric='merror')
xg_classifier.fit(X_train,y_train_encoded)

print('Validation Accuracy: XGBoost', xg_classifier.score(X_train, y_train_encoded))
print('Validation Accuracy: XGBoost', xg_classifier.score(X_val, y_val_encoded))

Validation Accuracy: XGBoost 0.9945
Validation Accuracy: XGBoost 0.958


In [45]:
from sklearn.ensemble import AdaBoostClassifier

ada_classifier = AdaBoostClassifier(n_estimators=50, learning_rate=2, random_state=42)
ada_classifier.fit(X_train,y_train)


print('Training Accuracy: Adaboost', ada_classifier.score(X_train, y_train))
print('Validation Accuracy: Adaboost', ada_classifier.score(X_val, y_val))

Training Accuracy: Adaboost 0.5065
Validation Accuracy: Adaboost 0.502
