In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import ast
from sqlalchemy import create_engine
%matplotlib inline

In [2]:
engine = create_engine('sqlite:///hs_sims1.db')

df = pd.read_sql('SELECT * FROM EVENTS', con = engine)

In [3]:
del df['index']

In [4]:
df.dtypes

event_key      object
event_value    object
game_id         int64
player         object
timestamp      object
dtype: object

In [5]:
df.head()

Unnamed: 0,event_key,event_value,game_id,player,timestamp
0,deck,"['FP1_027', 'EX1_509', 'NEW1_029', 'OG_134', '...",1,player1,2016-06-08 18:47:53:334438
1,deck,"['GVG_081', 'AT_106', 'OG_090', 'AT_070', 'EX1...",1,player1,2016-06-08 18:47:53:370128
2,deck,"['FP1_015', 'OG_133', 'EX1_572', 'EX1_558', 'N...",1,player1,2016-06-08 18:47:53:374504
3,deck,"['CS2_226', 'NEW1_019', 'CS2_186', 'BRM_033', ...",1,player1,2016-06-08 18:47:53:381442
4,deck,"['EX1_556', 'EX1_170', 'EX1_011', 'OG_254', 'E...",1,player1,2016-06-08 18:47:53:411713


In [6]:
# converting unicode values to strings
df = df.astype(str)

# cleaning the event_values column
df.event_value.replace(to_replace = ['<', '>', '\(', '\)', 'Minion ', 'Spell ', 'Weapon ', 'Secret '],
           value = '', regex = True, inplace = True)


In [7]:
# converting strings to lists
for i in range(len(df.event_key)):
    if df.event_key[i] in ['deck', 'cards_mulliganed', 'cards_kept']:
        df.event_value[i] = ast.literal_eval(df.event_value[i])

In [8]:
df.head(100)

Unnamed: 0,event_key,event_value,game_id,player,timestamp
0,deck,"[FP1_027, EX1_509, NEW1_029, OG_134, GVG_071, ...",1,player1,2016-06-08 18:47:53:334438
1,deck,"[GVG_081, AT_106, OG_090, AT_070, EX1_007, CS2...",1,player1,2016-06-08 18:47:53:370128
2,deck,"[FP1_015, OG_133, EX1_572, EX1_558, NEW1_041, ...",1,player1,2016-06-08 18:47:53:374504
3,deck,"[CS2_226, NEW1_019, CS2_186, BRM_033, FP1_030,...",1,player1,2016-06-08 18:47:53:381442
4,deck,"[EX1_556, EX1_170, EX1_011, OG_254, EX1_044, G...",1,player1,2016-06-08 18:47:53:411713
5,deck,"[EX1_009, OG_122, EX1_050, EX1_062, OG_321, GV...",1,player1,2016-06-08 18:47:53:405278
6,deck,"[AT_101, EX1_341, EX1_284, EX1_085, LOE_111, G...",1,player1,2016-06-08 18:47:53:388548
7,deck_cost,3.95333333333,1,player1,2016-06-08 18:47:53:486023
8,deck_cost,3.95333333333,1,player1,2016-06-08 18:47:53:545314
9,deck_cost,3.95333333333,1,player1,2016-06-08 18:47:53:538533


In [17]:
type(df.ix[98, 1])

str

In [36]:
def expand_decks(df):
    if len(df.event_value.values[0]) == 0:
        df['event_value'] = 'NONE'
        df['event_iter'] = 0.
        return df
    else:
        ndf = pd.concat([df]*len(df.event_value.values[0]), ignore_index=True)
        ndf['event_value'] = df.event_value.values[0]
        ndf['event_iter'] = range(ndf.shape[0])
        return ndf


In [37]:
decks_times = df.loc[df.event_key.isin(['deck','cards_mulliganed','cards_kept'])]
decks = decks_times.groupby('timestamp').apply(expand_decks).reset_index(drop=True)

In [38]:
decks[decks.event_key == 'cards_mulliganed']

Unnamed: 0,event_key,event_value,game_id,player,timestamp,event_iter
480,cards_mulliganed,Master Jouster,1,Player1,2016-06-08 18:47:54:499830,0.0
484,cards_mulliganed,NONE,1,Player1,2016-06-08 18:47:54:602784,0.0
485,cards_mulliganed,NONE,1,Player1,2016-06-08 18:47:54:641989,0.0
489,cards_mulliganed,Drakonid Crusher,1,Player2,2016-06-08 18:47:54:726264,0.0
494,cards_mulliganed,NONE,1,Player1,2016-06-08 18:47:54:748374,0.0
497,cards_mulliganed,Ice Rager,1,Player1,2016-06-08 18:47:54:812724,0.0
498,cards_mulliganed,Mini-Mage,1,Player1,2016-06-08 18:47:54:812724,1.0
504,cards_mulliganed,Animated Armor,1,Player2,2016-06-08 18:47:54:968796,0.0
505,cards_mulliganed,Light's Champion,1,Player1,2016-06-08 18:47:54:985587,0.0
506,cards_mulliganed,Grimscale Oracle,1,Player1,2016-06-08 18:47:54:985587,1.0


In [25]:
df.event_value.values

array([ ['FP1_027', 'EX1_509', 'NEW1_029', 'OG_134', 'GVG_071', 'EX1_583', 'EX1_620', 'EX1_009', 'GVG_108', 'OG_300', 'AT_096', 'CS2_181', 'NEW1_025', 'GVG_084', 'OG_286', 'CS2_092', 'GVG_117', 'NEW1_018', 'OG_284', 'CS1_042', 'AT_127', 'EX1_029', 'GVG_064', 'EX1_363', 'EX1_284', 'EX1_097', 'OG_152', 'EX1_382', 'AT_113', 'LOE_038'],
       ['GVG_081', 'AT_106', 'OG_090', 'AT_070', 'EX1_007', 'CS2_221', 'AT_109', 'EX1_614', 'CS2_168', 'LOE_089', 'EX1_560', 'CS2_221', 'EX1_011', 'GVG_118', 'EX1_287', 'AT_113', 'AT_118', 'AT_093', 'CS2_117', 'EX1_044', 'NEW1_023', 'AT_113', 'FP1_028', 'FP1_027', 'EX1_393', 'AT_082', 'CS2_181', 'GVG_069', 'EX1_508', 'LOE_047'],
       ['FP1_015', 'OG_133', 'EX1_572', 'EX1_558', 'NEW1_041', 'FP1_007', 'GVG_025', 'EX1_105', 'AT_123', 'OG_340', 'EX1_043', 'CS2_121', 'EX1_577', 'AT_034', 'EX1_131', 'CS2_179', 'PRO_001', 'LOEA10_3', 'EX1_284', 'EX1_011', 'OG_102', 'AT_131', 'CS2_200', 'AT_115', 'EX1_508', 'EX1_557', 'AT_114', 'OG_267', 'LOE_010', 'CS2_186'],
  

In [26]:
ndf = pd.concat([df]*len(df.event_value.values[0]), ignore_index=True)

for i in range(len(ndf.event_key)):
    if type(df.event_value[i]) == list:
        ndf['event_value'][i] = df.event_value.values[i][0]
    else: 

In [27]:
ndf

Unnamed: 0,event_key,event_value,game_id,player,timestamp
0,deck,3.95333333333,1,player1,2016-06-08 18:47:53:334438
1,deck,3.95333333333,1,player1,2016-06-08 18:47:53:370128
2,deck,3.95333333333,1,player1,2016-06-08 18:47:53:374504
3,deck,3.95333333333,1,player1,2016-06-08 18:47:53:381442
4,deck,3.95333333333,1,player1,2016-06-08 18:47:53:411713
5,deck,3.95333333333,1,player1,2016-06-08 18:47:53:405278
6,deck,3.95333333333,1,player1,2016-06-08 18:47:53:388548
7,deck_cost,3.95333333333,1,player1,2016-06-08 18:47:53:486023
8,deck_cost,3.95333333333,1,player1,2016-06-08 18:47:53:545314
9,deck_cost,3.95333333333,1,player1,2016-06-08 18:47:53:538533


In [30]:
nodecks = df[~df.event_key.isin(['deck','cards_mulliganed','cards_kept'])]
nodecks['event_iter'] = 0
expanded = pd.concat([decks, nodecks], ignore_index=True)
expanded.sort_values(['event_key','event_iter','player','timestamp'], inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [None]:
df.event_key.unique()

In [None]:
df.ix[6, 0]

In [None]:
df.ix[7]

In [None]:
turns = pd.DataFrame([])

In [None]:
for x in df.event_key.unique():
    if x in ['turn_begins', 'max_mana', 'avg_hand_cost', 'mana_used', 'turn_end', 'hero_health', 'hero_power', 'overdraw']:
        idx = df.event_key[df.event_key == x].index.tolist()
        turns = turns.append(df.ix[idx])

In [None]:
turns = turns[turns['game_id'] != 1]

In [None]:
turns.sort_values(by = ['game_id', 'event_value'], ascending = True, inplace = True)

In [None]:
for x in turns.event_value[900:1000]:
    try:
        turns.event_value[turns.event_value == x] = turns.event_value[turns.event_value == x].map(lambda y: float(y))
    except ValueError:
        pass

In [None]:
turns

In [None]:
df_pivot = df.pivot_table(index = 'game_id', columns=['event_key', 'player'], values='event_value', aggfunc=lambda x: ' '.join(x))
df_pivot.head()

In [None]:
def normalize(row):
    print eval(row.event_value)
    row['dave'] = 2
    return row

df.head(2).apply(normalize, axis = 1)