In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import ast
from sqlalchemy import create_engine
%matplotlib inline

In [2]:
engine = create_engine('sqlite:///hs_sims1.db')

df = pd.read_sql('SELECT * FROM EVENTS', con = engine)

In [3]:
del df['index']

In [4]:
df.dtypes

event_key      object
event_value    object
game_id         int64
player         object
timestamp      object
dtype: object

In [5]:
df.head()

Unnamed: 0,event_key,event_value,game_id,player,timestamp
0,deck,"['FP1_027', 'EX1_509', 'NEW1_029', 'OG_134', '...",1,player1,2016-06-08 18:47:53:334438
1,deck,"['GVG_081', 'AT_106', 'OG_090', 'AT_070', 'EX1...",1,player1,2016-06-08 18:47:53:370128
2,deck,"['FP1_015', 'OG_133', 'EX1_572', 'EX1_558', 'N...",1,player1,2016-06-08 18:47:53:374504
3,deck,"['CS2_226', 'NEW1_019', 'CS2_186', 'BRM_033', ...",1,player1,2016-06-08 18:47:53:381442
4,deck,"['EX1_556', 'EX1_170', 'EX1_011', 'OG_254', 'E...",1,player1,2016-06-08 18:47:53:411713


In [7]:
# converting unicode values to strings
df.event_value = df.event_value.astype(str)
df.replace(to_replace = ['<', '>'], value = '', regex = True, inplace = True)

#, '\(', '\)', 'Minion ', 'Spell ', 'Weapon ', 'Secret '], 

In [8]:
# converting strings to lists
for i in range(len(df.event_key)):
    if df.event_key[i] in ['deck', 'cards_mulliganed', 'cards_kept']:
        df.event_value[i] = ast.literal_eval(df.event_value[i])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [8]:
df.shape

(190291, 5)

In [9]:
def expand_decks(df):
    ndf = pd.concat([df]*len(df.event_value.values[0]), ignore_index=True)
    ndf['event_value'] = df.event_value.values[0]
    ndf['event_iter'] = range(ndf.shape[0])
    return ndf

In [10]:
df.event_key.unique()

array([u'deck', u'deck_cost', u'num_minions', u'num_spells',
       u'num_weapons', u'player1_class', u'player2_class', u'first_player',
       u'cards_mulliganed', u'cards_kept', u'turn_begins', u'max_mana',
       u'avg_hand_cost', u'mana_used', u'turn_end', u'hero_health',
       u'hero_power', u'lost', u'overdraw'], dtype=object)

In [17]:
decks_times = df.loc[df.event_key.isin(['deck','cards_mulliganed','cards_kept'])]

In [21]:
decks_times

Unnamed: 0,event_key,event_value,game_id,player,timestamp
0,deck,"[FP1_027, EX1_509, NEW1_029, OG_134, GVG_071, ...",1,player1,2016-06-08 18:47:53:334438
1,deck,"[GVG_081, AT_106, OG_090, AT_070, EX1_007, CS2...",1,player1,2016-06-08 18:47:53:370128
2,deck,"[FP1_015, OG_133, EX1_572, EX1_558, NEW1_041, ...",1,player1,2016-06-08 18:47:53:374504
3,deck,"[CS2_226, NEW1_019, CS2_186, BRM_033, FP1_030,...",1,player1,2016-06-08 18:47:53:381442
4,deck,"[EX1_556, EX1_170, EX1_011, OG_254, EX1_044, G...",1,player1,2016-06-08 18:47:53:411713
5,deck,"[EX1_009, OG_122, EX1_050, EX1_062, OG_321, GV...",1,player1,2016-06-08 18:47:53:405278
6,deck,"[AT_101, EX1_341, EX1_284, EX1_085, LOE_111, G...",1,player1,2016-06-08 18:47:53:388548
13,deck,"[AT_113, OG_174, CS1_069, GVG_095, EX1_045, AT...",1,player1,2016-06-08 18:47:53:392971
36,deck,"[OG_303, AT_092, CS2_120, EX1_067, BRM_033, OG...",1,player2,2016-06-08 18:47:53:868333
37,deck,"[LOE_107, LOE_110, CS2_181, GVG_064, LOE_038, ...",1,player2,2016-06-08 18:47:53:890836


In [20]:
decks_times.groupby('timestamp')

ValueError: No objects to concatenate

In [11]:
decks_times = df.loc[df.event_key.isin(['deck','cards_mulliganed','cards_kept'])]
decks = decks_times.groupby('timestamp').apply(expand_decks).reset_index(drop=True)

ValueError: No objects to concatenate

In [None]:
decks.head(10)

In [None]:
nodecks = df[~df.event_key.isin(['deck','cards_mulliganed','cards_kept'])]
nodecks['event_iter'] = 0
expanded = pd.concat([decks, nodecks], ignore_index=True)
expanded.sort_values(['event_key','event_iter','player','timestamp'], inplace=True)


In [None]:
df.event_key.unique()

In [None]:
df.ix[6, 0]

In [None]:
df.ix[7]

In [None]:
turns = pd.DataFrame([])

In [None]:
for x in df.event_key.unique():
    if x in ['turn_begins', 'max_mana', 'avg_hand_cost', 'mana_used', 'turn_end', 'hero_health', 'hero_power', 'overdraw']:
        idx = df.event_key[df.event_key == x].index.tolist()
        turns = turns.append(df.ix[idx])

In [None]:
turns = turns[turns['game_id'] != 1]

In [None]:
turns.sort_values(by = ['game_id', 'event_value'], ascending = True, inplace = True)

In [None]:
for x in turns.event_value[900:1000]:
    try:
        turns.event_value[turns.event_value == x] = turns.event_value[turns.event_value == x].map(lambda y: float(y))
    except ValueError:
        pass

In [None]:
turns

In [None]:
df_pivot = df.pivot_table(index = 'game_id', columns=['event_key', 'player'], values='event_value', aggfunc=lambda x: ' '.join(x))
df_pivot.head()

In [None]:
def normalize(row):
    print eval(row.event_value)
    row['dave'] = 2
    return row

df.head(2).apply(normalize, axis = 1)