## Tables to create

Key tables
* alignment
* death
* roles
* settings

Data tables
* player (just player name & id)
* game (general stats about each game)
* gms (gms for each game)
* playergame (each player's stats in each game)
* player_roles (roles for a row in playergame)

In [None]:
import pandas as pd 

In [1]:
# IMPORTING DATA TAB
data = pd.read_csv("data/original_data/original_data.csv")

data.rename(inplace=True, columns={
    "Player": "player_name",
    "Alignment": "alignment_type",
    "Faction Outcome": "win", 
    "First Hit": "first_hit", 
    "Last Hit": "last_hit",
    "# of Hits": "num_hits",
    "Death/\r\n Survival": "death", # ???
    "Inactive": "inactive",
    "Mod": "IM",
    "Game Type": "format",
    "Game #": "game_num",
    "Unnamed: 14": "game_string",
    "Role": "role",
    "Unnamed: 17": "secondary_role"
    })

data.head()

# more cleaning here? 

Unnamed: 0,ID,player_name,alignment_type,win,first_hit,last_hit,num_hits,death,inactive,GM,IM,Spec,format,game_num,game_string,Broken,role,secondary_role
0,1,Meta,,,,,,,,Y,,,LG,1.0,LG1,,GM,
1,2,Cracknut,G,L,1.0,1.0,1.0,E,,,,,LG,1.0,LG1,,Tineye,
2,3,Edgedancer,G,L,2.0,2.0,1.0,X,,,,,LG,1.0,LG1,,Cop,
3,4,Mailliw73,G,L,4.0,4.0,1.0,X,,,,,LG,1.0,LG1,,Regular,
4,5,Aether,E,W,4.0,4.0,1.0,V,,,,,LG,1.0,LG1,,Smoker,


In [17]:
# IMPORTING GAME TAB

# header = 1 because there's an extra header row on top w categories
games = pd.read_csv("data/original_data/original_game_3-28.csv", header=1)

# cols wanted: id, format, number, string, anon num,
# mech balance, dist balance, IM id, start date, end date
# num cycles, num posts, setting id
# complexity, fundamentals, role madness

# cols still needed: format, number, mech balance, dist balance, 
# IM id, start date, end date, setting id

games.reset_index(inplace=True)

# ! Need to be careful about sheet renaming cols - some kind of check?
games.rename(inplace=True, columns={
    "index": "game_id",
    "auto?": "game_string",
    "# Players": "num_players", # optional
    "# Cycles": "num_cycles",
    "# Posts": "num_posts",
    "Fundamentals": "fundamentals", 
    "Role Madness": "role_madness",
    "Winner": "winner", # optional
    "Complexity": "complexity",
    "Broken": "broken", # TODO make into mech/dist once that's set in sheet
    "World": "world", # needs to make setting table and replace this w setting_id
    "Anon?": "anon_num", # currently still boolean
    "Title/Link": "title" # need to get link separately. hidden col? 
    })

# ideally add to game tab hidden cols for: raw link, format, num

print(games.columns)

# remaining columns are just calculated from data test
games = games.filter([
    "game_id",
    "game_string", 
    "num_players",
    #"IM", # currently blank - pulling from Data instead
    "num_cycles", 
    "num_posts", 
    "fundamentals", 
    "role_madness",
    "winner", 
    "complexity", 
    "broken", 
    "world",
    "anon_num",
    "title"
])



games.tail()
# check for almost all NaN rows! extra checkboxes cause this

Index(['game_id', 'game_string', 'num_players', 'num_cycles', 'num_posts',
       'fundamentals', 'winner', 'complexity', 'role_madness', 'anon_num',
       'Mech', 'Dist', 'broken', '# Elims', 'Elim %', '# Xed', '# Vig'd',
       '# Deaths', '# Survivors', '# E killed', '# M killed', '# N killed',
       '# V killed', '# FF', '# Xed.1', '# I killed', '# pinchhitters',
       '# Spectators', 'GM(s)', 'IM', 'Start/End date?', 'title',
       'Sanderson? ', 'world'],
      dtype='object')


Unnamed: 0,game_id,game_string,num_players,num_cycles,num_posts,fundamentals,role_madness,winner,complexity,broken,world,anon_num,title
207,207,MR56,21.0,8.0,,V/E,False,F,,,Reckoners,False,Day of Reckoning
208,208,LG83,28.0,6.0,,V/E,False,E,,,,False,The Survivor
209,209,BT1,12.0,4.0,,V/E,False,E,,,Cross-Cosmere,False,Auction of Stories
210,210,QF59,15.0,7.0,,V/E,False,E,,,Roshar,False,Bachelor - Roshar Edition!
211,211,MR57,,,,V/E,False,,Semi Standard,,,False,


In [3]:
# PLAYER
unique_players = data["player_name"].unique()

player = pd.DataFrame({"player_id": range(len(unique_players)), "player_name": unique_players})

player.to_csv("data/player1.csv", index=False)

player.head()


Unnamed: 0,player_id,player_name
0,0,Meta
1,1,Cracknut
2,2,Edgedancer
3,3,Mailliw73
4,4,Aether


In [None]:
# Assume key tables for death/alignment are done from key_tables notebook
# pull them in here



In [18]:
# SETTINGS, DEATH, ALIGNMENT

unique_worlds = games["world"].unique()

unique_worlds

# TODO: check this against key table settings for worlds not in it
# for each loc not there, put down as non-Sanderson and print that you're doing so

# do the same check with unique death and alignment tables, but don't insert by default - just print thing

array(['Scadrial', 'Nalthis', 'Sel', 'Roshar', 'Wheel of Time',
       'Threnody', 'Rithmatist', 'Reckoners', 'Cross-Cosmere',
       'Hunger Games', 'Legion', nan, 'First of the Sun',
       'Game of Thrones', 'KKC', 'Death Note', 'Codex Alera', 'Pokemon',
       'Black Prism', 'Red Rising', 'Discworld', 'Elder Scrolls',
       'Alcatraz', 'My Little Pony', 'Other Sanderson', 'Les Miserables',
       'Gunnerkrigg Court', 'Lord of the Rings', 'Taldain',
       'Kingkiller Chronicles', 'Princess Bride', 'Zelda', 'Firefly'],
      dtype=object)

In [4]:
# TODO id replacements:
# in PG - player_id, game_id, alignment_id, death_id
# in game - setting_id

In [None]:
# Pull rest of stuff into game table from PG: IM id, format/number(?)

# export games to csv (optionally)

In [None]:
# make GMs table from PG
gms = data.stuff()

# optionally export gms table

In [None]:
# if roles are in, make roles table

In [None]:
# clean PG - take out GM, spec, IM, ??

# optionally export playergame table

At this point should have the tables:
* data (playergame)
* games (game)
* player
* gms
* alignment
* death
* settings

time for...
## SQL Insertion


In [None]:
# ???