In [163]:
# Dependencies
import pymongo
import datetime
import pandas as pd
from bson.json_util import dumps

# The default port used by MongoDB is 27017
# https://docs.mongodb.com/manual/reference/default-mongodb-port/
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Declare the database
db = client.se_db

# Declare the collections
players = db.players_db
nonplayers = db.nonplayers_db
games = db.games_db

### Pull data

In [None]:
player_path = "datacsvs/CleanPlayerData.csv"
playerdata = pd.read_csv(player_path)

nonplayer_path = "datacsvs/NonPlayerData.csv"
npdata = pd.read_csv(nonplayer_path)

gamepath = 'datacsvs/CleanGameData.csv'
game_data = pd.read_csv(gamepath)

### Set up player & game IDs

In [92]:
realdata = playerdata[playerdata['Alignment'] != '0'] # eventually change to playerdata
playerlist = realdata['person'].unique()
player_ids = {p:(i+1) for i,p in enumerate(playerlist)}

gamelist = realdata['Game'].unique()
game_ids = []
for i, g in enumerate(gamelist):
    if type(g) == str:
        curr_game = {
            'str':g,
            'id':i,
            'format':g[0:2],
            'num':g[2:]
        }
        game_ids.append(curr_game)

game_ids_df = pd.DataFrame(game_ids)

Unnamed: 0,person,Alignment,fOutcome,Hit1,HitL,HitNum,Death,Inactive,gtype,GameNum,Game,Broken,Role
0,Cracknut,G,L,1.0,1.0,1.0,E,N,LG,1.0,LG1,N,Tineye
1,Edgedancer,G,L,2.0,2.0,1.0,L,N,LG,1.0,LG1,N,Cop
2,Mailliw73,G,L,4.0,4.0,1.0,L,N,LG,1.0,LG1,N,Regular
3,Aether,E,W,4.0,4.0,1.0,V,N,LG,1.0,LG1,N,Smoker
4,Claincy,E,W,-,-,0.0,S,N,LG,1.0,LG1,N,Vote Cancel


### Players (data test) into mongo

In [29]:
players.delete_many({})

stats_list = []
for p in realdata.iterrows():
    row = p[1]


    try: 
        curr_id = int(game_ids_df[game_ids_df['str'] == row['Game']].index[0])
    except:
        print(row)
        
    pgstat = {
        'player_id': player_ids.get(row['person'], 0),
        'player_name': row['person'],
        'game_id': curr_id,
        'game_str': row['Game'],
        'alignment': row['Alignment'],
        'fOutcome': row['fOutcome'],       
    }
    if type(row['Death']) != str:
        pgstat['death'] = '0'
    else:
        pgstat['death'] = row['Death']
    
    if row['HitNum'] != '-':
        pgstat['HitNum'] = row['HitNum']
        
        if row['Hit1'] != '-' :
            pgstat['Hit1'] = row['Hit1']
            pgstat['HitL'] = row['HitL']
            
    if row['Inactive'] == 'Y':
        pgstat['inactive'] = True
    else:
        pgstat['inactive'] = False
        
    if row['Role'] != '-':
        pgstat['role'] = row['Role']
    
    
    stats_list.append(pgstat)
    players.insert_one(pgstat)

In [31]:
json_player_data = dumps(list(players.find()))
 
with open('datajsons/playerdata.json', 'w') as file: 
    file.write(json_player_data) 

In [206]:
# # Verify results:
# results = players.find()
# for result in results:
#     print(result)

### Nonplayers (data test) into mongo

In [10]:
nonplayers.delete_many({})
# @TODO: rename row2 and currid2 to more descriptive vars
np_stats = []
for n in npdata.iterrows():
    row2 = n[1]

    try: 
        curr_id2 = int(game_ids_df[game_ids_df['str'] == row2['Game']].index[0])
    except:
        print(row)
        
    npgstat = {
        'player_id': player_ids.get(row2['person'], 0),
        'player_name': row2['person'],
        'game_id': curr_id2,
        'game_str': row2['Game'],
                
    }
            
    if row2['Spec'] == 'Y':
        npgstat['GM'] = False
        npgstat['spec'] = True
        npgstat['IM'] = False
    elif row2['GM'] == 'Y':
        npgstat['GM'] = True
        npgstat['spec'] = False
        npgstat['IM'] = False
    else:
        npgstat['GM'] = False
        npgstat['spec'] = False
        npgstat['IM'] = True

    
    np_stats.append(npgstat)
    nonplayers.insert_one(npgstat)
    

In [None]:
# results = nonplayers.find()
# for result in results:
#     print(result)

In [11]:
json_nonplayer_data = dumps(list(nonplayers.find()))
with open('datajsons/nonplayerdata.json', 'w') as file2: 
    file2.write(json_nonplayer_data) 

### Game data (game stats) into mongo

In [191]:
players_with_ids = realdata.merge(game_ids_df, left_on='Game', right_on='str')
grouped_pd = players_with_ids.groupby('id', sort = False)

In [192]:
# Broken
broken_games = grouped_pd['Broken'].value_counts().index
broken_list = list(zip(*broken_games))[1]
game_ids_df['Broken'] = [True if b == 'Y' else False for b in broken_list_yn]

broken_games = grouped_pd['Broken'].value_counts()
game_ids_df['num_players'] = list(broken_games)

game_ids_df.head()

Unnamed: 0,str,id,format,num,Broken,num_players
0,LG1,0,LG,1,False,16
1,LG2,1,LG,2,False,29
2,LG3,2,LG,3,True,22
3,LG4,3,LG,4,True,30
4,QF1,4,QF,1,False,12


In [202]:
relevant_gd = game_data[['Setting', 'Game Complexity', '# of Cycles']]

alignment_counts = grouped_pd['Alignment'].value_counts()
foutcome_counts = grouped_pd['fOutcome'].value_counts()
death_counts = grouped_pd['Death'].value_counts()
inactive_counts = grouped_pd['Inactive'].value_counts()
#inactive_counts = players_with_ids[players_with_ids['Inactive'] == "Y"].groupby('id', sort=False)['Inactive'].value_counts(sort = False)
counts = [alignment_counts, foutcome_counts, death_counts, inactive_counts, relevant_gd]


In [204]:
inactive_counts

id   Inactive
0    N           16
1    N           29
2    N           22
3    N           30
4    N           12
                 ..
170  N           15
171  N           16
172  N           23
     Y            2
173  N           18
Name: Inactive, Length: 228, dtype: int64

In [205]:

count_names = ['_align', '_win', '_death', '_inactive', '']

for i, count in enumerate(counts): 
    count_df = count.unstack().fillna(0)
    game_ids_df = game_ids_df.merge(count_df, how='left', left_index=True, right_index=True, suffixes = ('', count_names[i]))

game_ids_df.head()

ValueError: Cannot merge a Series without a name

Unnamed: 0,Setting,Game Complexity,# of Cycles
0,Cosmere,Basic,4
1,Cosmere,Standard,11
2,Cosmere,Basic,7
3,Cosmere,Standard,9
4,Cosmere,Basic,4


In [174]:
games.delete_many({})

game_stats = []

align_list = ['G', 'E', 'B', 'F', 'D', 'M', 'N', 'C', 'S']
outcome_list = ['L', 'W', 'D_win']
death_list = ['E_death', 'L_death', 'V', 'S_death', 'F_death', 'N_death', 'M_death', 'I', 'D_death', 'O']

for n in game_ids_df.iterrows():
    item = n[1]
 
    
    aligns = {k:item[k] for k in align_list if item[k] > 0}
    outcomes = {k:item[k] for k in outcome_list if item[k] > 0}
    deaths = {k:item[k] for k in death_list if item[k] > 0}
        
        
    gstat = {

        'game_id': item['id'],
        'game_str': item['str'],
        'format': item['format'],
        'game_num': item['num'],
        'broken': item['Broken'],
        'num_players': item['num_players'],
        'alignment_counts': aligns,
        'outcome_counts': outcomes,
        'status_counts': deaths,
        'inactives': item['Y']
            
        }
                
    game_stats.append(gstat)
    
    games.insert_one(gstat)
    
            

In [176]:
game_stats[0]

{'game_id': 0,
 'game_str': 'LG1',
 'format': 'LG',
 'game_num': '1',
 'broken': False,
 'num_players': 16,
 'alignment_counts': {'G': 12.0, 'E': 4.0},
 'outcome_counts': {'L': 12.0, 'W': 4.0},
 'status_counts': {'E_death': 5.0, 'L_death': 4.0, 'V': 1.0, 'S_death': 6.0},
 'inactives': nan,
 '_id': ObjectId('5ffe6d7f95bedd8f2649a021')}

In [177]:
game_data.columns
# 'Eliminators Lynched', 'Eliminators Vig'd' - breakdown by alignment + death type?

['Setting', 'Game Complexity', '# of Cycles'] # from game data
['mod', 'GM(s)', 'Spectator(s)'] # from nonplayer data
# start and end dates

Index(['Unnamed: 0', 'Game', '# of Players', 'Total Rank', '# of Spectators',
       'Rank', '# of E. Kills', '# of Neutral Evil Kils', '# of V. Kills',
       '# of Lynches', '# of FF Deaths', '# of Neutral Deaths',
       '# of Inactive Deaths', '# of Deaths', 'Rank.1', '# of Eliminators',
       'Eliminator Percentage', 'Eliminators Lynched', 'Eliminators Vig'd',
       'Thread Reply Count', 'Thread Reply Rank', 'Setting', 'Game Complexity',
       '# of Cycles', 'Winning Team', '# of Survivors', 'Total Rank.1',
       'Survivors'],
      dtype='object')