In [1]:
#@title Import Libraries & Set Options

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import OneHotEncoder
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore")

In [2]:
#@title Declare Data Path

data_path = '../Data/Mapping Tables.xlsx'

In [4]:
#@title Load Data To Model

# Load the Excel files
data = pd.read_excel(data_path, sheet_name = None)

# Access the individual sheets as needed
Instance_df = data['Instance-ID']
Zone_df = data['Zone-ID']
Class_df = data['Class-ID']
Race_df = data['Race-ID']
NPC_df = data['NPC-ID']
NPC_Data_df = data['NPC-Data']
Entries_df = data['Entries_Clean']

FileNotFoundError: ignored

In [None]:
#@title Format Collected Entries to Dataframe

pivot_df = Entries_df.pivot(index=['Server', 'Character Name', 'Character ID'], columns='Field', values='Field Value')
pivot_df.reset_index(inplace=True)

sub_df = pivot_df[['Server','Character Name','Character ID','class_id','date','guild','last_words','level','map_id','map_pos','race_id','source_id']]


In [None]:
#@title Drop Missing Records

sub_df_dropna = sub_df.dropna(subset=['map_id'])

In [None]:
#@title Merge Dataframes

# Map zone IDs to zone names
zone_map = Zone_df.set_index('Zone ID')['Zone'].to_dict()
sub_df_dropna['map_name'] = sub_df_dropna['map_id'].apply(lambda x: zone_map.get(int(x), x))

# Map race IDs to race names
race_map = Race_df.set_index('Race ID')['Race'].to_dict()
sub_df_dropna['race_name'] = sub_df_dropna['race_id'].apply(lambda x: race_map.get(int(x), x))

# Map class IDs to class names
class_map = Class_df.set_index('Class ID')['Class'].to_dict()
sub_df_dropna['class_name'] = sub_df_dropna['class_id'].apply(lambda x: class_map.get(int(x), x))

# Map source IDs to NPC names
npc_map = NPC_df.set_index('NPC ID')['NPC'].to_dict()
sub_df_dropna['npc_name'] = sub_df_dropna['source_id'].apply(lambda x: npc_map.get(int(x), ""))

# Map source IDs to NPC data fields
npc_data_map_name = NPC_Data_df.set_index('ID')['Name'].to_dict()
npc_data_map_start_level = NPC_Data_df.set_index('ID')['Start Level'].to_dict()
npc_data_map_end_level = NPC_Data_df.set_index('ID')['End Level'].to_dict()
npc_data_map_elite = NPC_Data_df.set_index('ID')['Elite'].to_dict()
npc_data_map_rare = NPC_Data_df.set_index('ID')['Rare'].to_dict()
npc_data_map_boss = NPC_Data_df.set_index('ID')['Boss'].to_dict()
npc_data_map_type = NPC_Data_df.set_index('ID')['Type'].to_dict()

sub_df_dropna['npc_name'] = sub_df_dropna['source_id'].apply(lambda x: npc_data_map_name.get(int(x), ""))
sub_df_dropna['npc_start_level'] = sub_df_dropna['source_id'].apply(lambda x: npc_data_map_start_level.get(int(x), ""))
sub_df_dropna['npc_end_level'] = sub_df_dropna['source_id'].apply(lambda x: npc_data_map_end_level.get(int(x), ""))
sub_df_dropna['npc_elite_status'] = sub_df_dropna['source_id'].apply(lambda x: npc_data_map_elite.get(int(x), ""))
sub_df_dropna['npc_rare_status'] = sub_df_dropna['source_id'].apply(lambda x: npc_data_map_rare.get(int(x), ""))
sub_df_dropna['npc_boss_status'] = sub_df_dropna['source_id'].apply(lambda x: npc_data_map_boss.get(int(x), ""))
sub_df_dropna['npc_type'] = sub_df_dropna['source_id'].apply(lambda x: npc_data_map_type.get(int(x), ""))

In [None]:
#@title Perform Data Cleaning

sub_df_dropna['level'] = pd.to_numeric(sub_df_dropna['level'], errors='coerce')

sub_df_dropna['npc_start_level'] = pd.to_numeric(sub_df_dropna['npc_start_level'], errors='coerce')
sub_df_dropna['npc_end_level'] = pd.to_numeric(sub_df_dropna['npc_end_level'], errors='coerce')
sub_df_dropna['npc_avg_level'] = round((sub_df_dropna['npc_start_level'] + sub_df_dropna['npc_end_level']) / 2)
sub_df_dropna['npc_less_player_level'] = sub_df_dropna['npc_avg_level'] - sub_df_dropna['level']

sub_df_dropna['date'] = pd.to_datetime(sub_df_dropna['date'], unit='s')
sub_df_dropna['map_pos'] = sub_df_dropna['map_pos'].str.strip('"')  # Remove double quotes
sub_df_dropna[['x', 'y']] = sub_df_dropna['map_pos'].str.split(',', expand=True).astype(float)
sub_df_dropna['y'] = sub_df_dropna['y'].apply(lambda y: 1 - y)
sub_df_dropna['last_words'] = sub_df_dropna['last_words'].str.replace('"', '')
sub_df_dropna['guild'] = sub_df_dropna['guild'].str.replace('"', '')

In [None]:
#@title Show Dataframe Head

sub_df_dropna.head(5)

In [None]:
data = sub_df_dropna.copy()
sub_data = data[['class_id', 'level', 'map_id', 'race_id', 'source_id', 'npc_start_level', 'npc_end_level', 'npc_avg_level', 'npc_less_player_level', 'x','y']]
print(sub_data.head())

In [None]:
sub_data.dtypes

In [None]:
data2 = sub_df_dropna.copy()
sub_data2 = data[['class_id', 'level', 'map_id', 'race_id', 'source_id', 'npc_start_level', 'npc_end_level', 'npc_avg_level', 'npc_less_player_level', 'x','y', 'Character ID']]
print(len(sub_data2.dropna()))
print(sub_data2.head())

In [None]:
hierarchical_cluster = AgglomerativeClustering(n_clusters = 5)
sub_data['class_id'] = sub_data['class_id'].astype(int)
sub_data['map_id'] = sub_data['map_id'].astype(int)
sub_data['race_id'] = sub_data['race_id'].astype(int)
sub_data['source_id'] = sub_data['source_id'].astype(int)
labels = hierarchical_cluster.fit_predict(sub_data.dropna())

In [None]:
print(labels)
print(type(labels))
print(len(labels))
print(len(sub_data.dropna()))

In [None]:
deaths = sub_data.dropna()
print(type(deaths))

In [None]:
deaths['5clusters'] = labels.tolist()

In [None]:
hc3 = AgglomerativeClustering(n_clusters = 5)
l3 = hc3.fit_predict(sub_data.dropna())
print(len(l3))

In [None]:
hc7 = AgglomerativeClustering(n_clusters = 7)
l7 = hc7.fit_predict(sub_data.dropna())
print(len(l7))

In [None]:
deaths['3clusters'] = l3.tolist()
deaths['7clusters'] = l7.tolist()

In [None]:
print(type(sub_data2.dropna()['Character ID']))
deaths['CharacterID'] = sub_data2.dropna()['Character ID']

In [None]:
deaths.head()

In [None]:
deaths.to_csv('Clustered_deaths.csv')

In [None]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()

In [None]:
data = data.applymap(lambda x: x.strip() if isinstance(x,str) else x)
print(data.npc_type.unique())

In [None]:
data_c = data[data.npc_type != '']
l = data_c.npc_type.unique()

In [None]:
data_c = data_c.dropna()

In [None]:
data_c.npc_type.unique()

In [None]:
clf_data = data_c[['class_id', 'level', 'race_id', 'map_id']]
clf_data['class_id'] = clf_data['class_id'].astype(int)
clf_data['race_id'] = clf_data['race_id'].astype(int)
clf_data['map_id'] = clf_data['map_id'].astype(int)

In [None]:
nums = [0,1,2,3,4,5,6,7,8,9]

In [None]:
type_keys = data_c.npc_type.unique()
type_dict = {type_keys[i]:nums[i] for i in range(len(type_keys))}
print(type_dict)

In [None]:
data_c['npc_type_num'] = data_c['npc_type'].map(type_dict)

In [None]:
data_c.head()

In [None]:
clf = clf.fit(clf_data,data_c['npc_type_num'])

In [None]:
clf.predict([[4,8,1,1429]])

In [None]:
# Class, level, race, map
clf.predict([[4,3,2,1413] # Rogue, 3, Orc, The Barrens
              , [1, 10, 7, 1416] # Warrior, 10, Gnome, Alterac Mountains
              , [3, 5, 1, 1424] # Hunter, 5, Human, Hillsbrad Foothills
              , [11, 20, 8, 1444] # Druid, 20, Troll, Feralas
              , [8, 8, 4, 1447] # Mage, 8, Night Elf, Azshara
              , [1, 1, 6, 1412] # Warrior, 1, Tauren, Mulgore
             ])

In [None]:
# Humanoid, Humanoid, Beast, Humanoid, Demon, Beast