In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor


In [2]:
df = pd.read_csv('monsters.csv')
print(df.columns)

Index(['name', 'size', 'type', 'alignment', 'languages', 'ac', 'hp', 'cr',
       'speed', 'swim', 'fly', 'climb', 'burrow', 'passive_perception',
       'darkvision', 'truesight', 'tremorsense', 'blindsight', 'strength',
       'str_mod', 'dex', 'dex_mod', 'con', 'con_mod', 'intel', 'int_mod',
       'wis', 'wis_mod', 'cha', 'cha_mod', 'str_save', 'dex_save', 'con_save',
       'int_save', 'wis_save', 'cha_save', 'history', 'perception', 'stealth',
       'persuasion', 'insight', 'deception', 'arcana', 'religion',
       'acrobatics', 'athletics', 'intimidation', 'senses', 'attributes',
       'actions', 'legendary_actions', 'legendary', 'source'],
      dtype='object')


columns of importance for CR (monster difficulty) prediction. 
ac, hp, cr, all six _mod will be kept, rest will be removed.


Here i make a dataframe with only the _mod columns. My asumption is that we only need the highest mod value

In [3]:
mod_df = df[['str_mod','dex_mod','con_mod', 'int_mod', 'wis_mod','cha_mod']]
mod_df.columns

Index(['str_mod', 'dex_mod', 'con_mod', 'int_mod', 'wis_mod', 'cha_mod'], dtype='object')

All the _mod columns needs to be merged to one col called highest_mod 

In [4]:
print(mod_df.dtypes)

str_mod    int64
dex_mod    int64
con_mod    int64
int_mod    int64
wis_mod    int64
cha_mod    int64
dtype: object


In [5]:
mod_df.head()

Unnamed: 0,str_mod,dex_mod,con_mod,int_mod,wis_mod,cha_mod
0,0,2,0,0,1,0
1,-1,2,2,4,1,0
2,5,-1,2,4,2,4
3,7,0,6,-1,1,-1
4,1,3,5,8,5,5


In [6]:
# take the highest value of all the six _mod
max_vals = mod_df.apply(lambda row: row.max(), axis=1)
print(max_vals)

0      2
1      4
2      5
3      7
4      8
      ..
757    2
758    5
759    9
760    3
761    7
Length: 762, dtype: int64


In [7]:
# construct a new df with the needed columns
new_df = df[['ac','hp']]
new_df.head()

Unnamed: 0,ac,hp
0,12.0,13
1,12.0,84
2,17.0,135
3,15.0,137
4,21.0,285


In [8]:
new_df = new_df.assign(highest_mod=max_vals)
new_df['cr'] = df['cr']


new_df now is ready to get trained in a model

In [9]:
new_df.head()

Unnamed: 0,ac,hp,highest_mod,cr
0,12.0,13,2,0.25
1,12.0,84,4,9.0
2,17.0,135,5,10.0
3,15.0,137,7,9.0
4,21.0,285,8,23.0


Linear Regression model

In [10]:
X = new_df.iloc[:, :-1]  # Features are all but the last column
y = new_df.iloc[:, -1]   # Target is the last column

In [11]:
# Split the data into a training set and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Train a linear regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = lr.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print("Mean squared error: ", mse)

Mean squared error:  5.399397047729747


Decision Tree model

In [13]:
# Instantiate a decision tree model
tree = DecisionTreeRegressor(random_state=42)

# Fit the model to the training data
tree.fit(X_train, y_train)

# Generate predictions for the testing data
y_pred = tree.predict(X_test)

# Calculate the mean squared error of the predictions
mse = mean_squared_error(y_test, y_pred)
print("Mean squared error: ", mse)

Mean squared error:  6.34813144007603


Random forest model

In [14]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate a random forest model with 100 trees
forest = RandomForestRegressor(n_estimators=100, random_state=42)

# Fit the model to the training data
forest.fit(X_train, y_train)

# Generate predictions for the testing data
y_pred = forest.predict(X_test)

# Calculate the mean squared error of the predictions
mse = mean_squared_error(y_test, y_pred)
print("Mean squared error: ", mse)


Mean squared error:  4.883041655541767


Here i made my own test df from material source that was not in the original df.
Real CR values : 21 , 8 , 3 , 1/8 , 5 , 12 , 9

In [15]:
my_df = pd.DataFrame({
    'ac': [18,16,16,12,14,14,14],
    'hp': [425,103,71,9,114,217,157],
    'highest_mod': [9,5,3,2,5,7,4],
})

Linear model 

In [16]:
y_pred = lr.predict(my_df)
print(y_pred)

[24.71236332  7.18843647  4.6400878   0.32143756  7.24394197 13.08699022
  8.70761658]


Decicion Tree 

In [17]:
y_pred = tree.predict(my_df)
print(y_pred)

[25.         13.          6.33333333  0.          5.33333333 14.
  8.        ]


Random Forest

In [18]:
y_pred = forest.predict(my_df)
print(y_pred)


[24.04        9.47        6.93184921  0.07586648  5.96229762 12.05
  9.26583333]
