## Tables to create

Key tables
* alignment
* death
* roles
* settings

Data tables
* player (just player name & id)
* game (general stats about each game)
* gms (gms for each game)
* playergame (each player's stats in each game)
* player_roles (roles for a row in playergame)

In [1]:
import pandas as pd 

In [54]:
# KEY TABLES

alignment = pd.read_csv("data/keys/alignment.csv").reset_index().rename(columns={"index": "alignment_id"})

print(alignment.head())

settings = pd.read_csv("data/keys/settings.csv").reset_index().rename(columns={"index": "setting_id"})

print(settings.head())

death = pd.read_csv("data/keys/death.csv").reset_index().rename(columns={"index": "death_id"})

death.head()

   alignment_id alignment_char                     alignment_desc  is_elim  \
0             0              G                               Good    False   
1             1              E  Evil (Prim) (team kill / no conv)     True   
2             2              S             Secondary Evil Faction     True   
3             3              T              Tertiary Evil Faction     True   
4             4              ?            Evil Converter (w/kill)     True   

   is_evil  has_kill  can_convert  was_converted  
0    False     False        False          False  
1     True      True        False          False  
2     True      True        False          False  
3     True      True        False          False  
4     True      True         True          False  
   setting_id          world  is_sanderson  is_cosmere
0           0  Cross-Cosmere          True        True
1           1            Sel          True        True
2           2       Scadrial          True        True
3    

Unnamed: 0,death_id,death_char,death_desc,dying_alignment,killer_alignment,non-kills
0,0,S,Survived,none,none,
1,1,E,Killed by Eliminators,not E,E,
2,2,X,Exed,any,none,
3,3,V,Elim killed by Village,E,V,
4,4,F,Villager Killed by Village (Friendly Fire),V,V,


In [41]:
# IMPORTING DATA TAB
data = pd.read_csv("data/original_data/original_data.csv")

data.rename(inplace=True, columns={
    "Player": "player_name",
    "Alignment": "alignment_char",
    "Faction Outcome": "win", 
    "First Hit": "first_hit", 
    "Last Hit": "last_hit",
    "# of Hits": "num_hits",
    "Death/\r\n Survival": "death_char", # ???
    "Inactive": "inactive",
    "Mod": "IM",
    "Game Type": "format",
    "Game #": "game_num",
    "Unnamed: 14": "game_string",
    "Role": "role",
    "Unnamed: 17": "secondary_role"
    })

data.head()

# more cleaning here? 

Unnamed: 0,ID,player_name,alignment_char,win,first_hit,last_hit,num_hits,death_char,inactive,GM,IM,Spec,format,game_num,game_string,Broken,role,secondary_role
0,1,Meta,,,,,,,,Y,,,LG,1.0,LG1,,GM,
1,2,Cracknut,G,L,1.0,1.0,1.0,E,,,,,LG,1.0,LG1,,Tineye,
2,3,Edgedancer,G,L,2.0,2.0,1.0,X,,,,,LG,1.0,LG1,,Cop,
3,4,Mailliw73,G,L,4.0,4.0,1.0,X,,,,,LG,1.0,LG1,,Regular,
4,5,Aether,E,W,4.0,4.0,1.0,V,,,,,LG,1.0,LG1,,Smoker,


In [8]:
# IMPORTING GAME TAB

# header = 1 because there's an extra header row on top w categories
games = pd.read_csv("data/original_data/original_game_3-28.csv", header=1)

# cols wanted: id, format, number, string, anon num,
# mech balance, dist balance, IM id, start date, end date
# num cycles, num posts, setting id
# complexity, fundamentals, role madness

# cols still needed: format, number, mech balance, dist balance, 
# IM id, start date, end date, setting id

games.reset_index(inplace=True)

# ! Need to be careful about sheet renaming cols - some kind of check?
games.rename(inplace=True, columns={
    "index": "game_id",
    "auto?": "game_string",
    # GAME FORMAT
    # GAME NUM
    "# Players": "num_players", # optional
    "# Cycles": "num_cycles",
    "# Posts": "num_posts",
    "Fundamentals": "fundamentals", 
    "Role Madness": "role_madness",
    "Winner": "winner", # optional
    "Complexity": "complexity",
    "Broken": "broken", # TODO make into mech/dist once that's set in sheet
    # MECH
    # DIST
    "World": "world", # need to make setting table and replace this w setting_id
    "Anon?": "anon_num", # currently still boolean
    "Title/Link": "title" # need to get link separately. hidden col? 
    # LINK
    })

print(games.columns)

# remaining columns are just calculated from data test
games = games.filter([
    "game_id",
    "game_string", 
    "num_players",
    #"IM", # currently blank - pulling from Data instead
    "num_cycles", 
    "num_posts", 
    "fundamentals", 
    "role_madness",
    "winner", 
    "complexity", 
    "broken", 
    "world",
    "anon_num",
    "title"
])


games.tail()
# check for almost all NaN rows! extra checkboxes cause this

Index(['game_id', 'game_string', 'num_players', 'num_cycles', 'num_posts',
       'fundamentals', 'winner', 'complexity', 'role_madness', 'anon_num',
       'Mech', 'Dist', 'broken', '# Elims', 'Elim %', '# Xed', '# Vig'd',
       '# Deaths', '# Survivors', '# E killed', '# M killed', '# N killed',
       '# V killed', '# FF', '# Xed.1', '# I killed', '# pinchhitters',
       '# Spectators', 'GM(s)', 'IM', 'Start/End date?', 'title',
       'Sanderson? ', 'world'],
      dtype='object')


Unnamed: 0,game_id,game_string,num_players,num_cycles,num_posts,fundamentals,role_madness,winner,complexity,broken,world,anon_num,title
207,207,MR56,21.0,8.0,,V/E,False,F,,,Reckoners,False,Day of Reckoning
208,208,LG83,28.0,6.0,,V/E,False,E,,,,False,The Survivor
209,209,BT1,12.0,4.0,,V/E,False,E,,,Cross-Cosmere,False,Auction of Stories
210,210,QF59,15.0,7.0,,V/E,False,E,,,Roshar,False,Bachelor - Roshar Edition!
211,211,MR57,,,,V/E,False,,Semi Standard,,,False,


In [28]:
# PLAYER
unique_players = data["player_name"].unique()

player = pd.DataFrame({"player_id": range(len(unique_players)), "player_name": unique_players})

player.to_csv("data/player1.csv", index=False)

player.head()


Unnamed: 0,player_id,player_name
0,0,Meta
1,1,Cracknut
2,2,Edgedancer
3,3,Mailliw73
4,4,Aether


In [19]:
# SETTINGS, DEATH, ALIGNMENT

unique_worlds = games["world"].unique()

new_settings = []

for world in unique_worlds:
    if world not in settings["world_name"].unique():
        new_settings.append({"world_name": world, "is_sanderson": False, "is_cosmere": False})
        print(f"inserted {world}")

new_settings_df = pd.DataFrame(new_settings)

settings = pd.concat([settings, new_settings_df], ignore_index=True)

settings.tail()

# TODO: check this against key table settings for worlds not in it
# for each loc not there, put down as non-Sanderson and print that you're doing so

# do the same check with unique death and alignment tables, but don't insert by default - just print thing

inserted Hunger Games
inserted nan
inserted Game of Thrones
inserted KKC
inserted Death Note
inserted Codex Alera
inserted Pokemon
inserted Black Prism
inserted Red Rising
inserted Discworld
inserted Elder Scrolls
inserted My Little Pony
inserted Les Miserables
inserted Gunnerkrigg Court
inserted Lord of the Rings
inserted Kingkiller Chronicles
inserted Princess Bride
inserted Zelda
inserted Firefly


Unnamed: 0,world_name,is_sanderson,is_cosmere
30,Lord of the Rings,False,False
31,Kingkiller Chronicles,False,False
32,Princess Bride,False,False
33,Zelda,False,False
34,Firefly,False,False


In [42]:
# checking death / alignment

al = data["alignment_char"].unique()

for a in al:
    if a not in alignment["alignment_char"].unique():
        print(a)



de = data["death_char"].unique()

for d in de:
    if d not in death["death_char"].unique():
        print(d)




nan
nan
L?
?
V/M
L
V/E


In [43]:
data.dtypes

ID                  int64
player_name        object
alignment_char     object
win                object
first_hit         float64
last_hit          float64
num_hits          float64
death_char         object
inactive           object
GM                 object
IM                 object
Spec               object
format             object
game_num          float64
game_string        object
Broken             object
role               object
secondary_role     object
dtype: object

In [53]:
settings.dtypes

setting_id       int64
world_name      object
is_sanderson      bool
is_cosmere        bool
dtype: object

In [55]:
# TODO id replacements:
# in PG - player_id, game_id, alignment_id, death_id

merged_data = data.merge(player, on="player_name", how="left")

merged_data = merged_data.merge(games.filter(["game_id", "game_string"]), on="game_string", how="left")

merged_data = merged_data.merge(alignment.filter(["alignment_id", "alignment_char"]), on="alignment_char", how="left")

merged_data = merged_data.merge(death.filter(["death_id", "death_char"]), on="death_char", how="left")

merged_data
# in game - setting_id

games = games.merge(settings, on="world", how="left")
games

Unnamed: 0,game_id,game_string,num_players,num_cycles,num_posts,fundamentals,role_madness,winner,complexity,broken,world,anon_num,title,setting_id,is_sanderson,is_cosmere
0,0,LG1,16.0,4,462.0,V/E,False,E,Standard,Well Balanced,Scadrial,False,In the Wake of Koloss,2.0,True,True
1,1,LG2,29.0,11,1232.0,CON,False,E,Standard,Balanced,Scadrial,False,Devil's Den,2.0,True,True
2,2,LG3,22.0,7,739.0,V/E,False,V,Standard,Broken Distribution,Scadrial,False,Blackwater Village,2.0,True,True
3,3,LG4,30.0,9,1029.0,V/E,False,V,Semi Standard,Broken Mechanics,Nalthis,False,Colours of War,3.0,True,True
4,4,QF1,12.0,4,118.0,V/E,False,E,Basic,Well Balanced,Sel,False,Jeskeri Mysteries,1.0,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207,207,MR56,21.0,8,,V/E,False,F,,,Reckoners,False,Day of Reckoning,8.0,True,False
208,208,LG83,28.0,6,,V/E,False,E,,,,False,The Survivor,,,
209,209,BT1,12.0,4,,V/E,False,E,,,Cross-Cosmere,False,Auction of Stories,0.0,True,True
210,210,QF59,15.0,7,,V/E,False,E,,,Roshar,False,Bachelor - Roshar Edition!,4.0,True,True


In [None]:
# Pull rest of stuff into game table from PG: IM id, format/number(?)

# export games to csv (optionally)

In [None]:
# make GMs table from PG
gms = data.stuff()

# optionally export gms table

In [None]:
# if roles are in, make roles table

In [None]:
# clean PG - take out GM, spec, IM, ??

# optionally export playergame table

At this point should have the tables:
* data (playergame)
* games (game)
* player
* gms
* alignment
* death
* settings

time for...
## SQL Insertion


In [None]:
# ???