In [1]:
import pandas as pd 
import numpy as np

In [2]:
from io import StringIO
from datetime import date as Date

In [3]:
import os
import re

In [4]:
def parse_game_data(fname):
    """
    Parses the csv file containing the season game statistics for Toronto Maple Leaves
    and returns a pandas dataframe. 
    """
    return pd.read_csv(fname, skiprows=1, index_col=0)


def parse_player_level_stats(fpath, pattern='game(\d+).txt'):
    """
    Parses the csv file containing the player level statistics for a given game. 
    Returns the index of the game as well as a dataframe containing the data. 
    """
    df = pd.read_csv(fpath, skiprows=1, index_col=0).drop(np.nan)
    df = df.drop(columns=df.columns[-1])
    df.index = df.index.astype(int)
    return df

In [5]:
parse_game_data('data_files/season-stat-table1.csv')

Unnamed: 0_level_0,Date,Unnamed: 2,Opponent,GF,GA,Unnamed: 6,Unnamed: 7,Unnamed: 8,S,PIM,...,CA,CF%,FF,FA,FF%,FOW,FOL,FO%,oZS%,PDO
GP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2018-10-03,,Montreal Canadiens,3,2,W,OT,,26,8,...,63,43.8,35,42,45.5,32,18,64.0,35.3,105.8
2,2018-10-06,,Ottawa Senators,3,5,L,,,37,4,...,36,67.9,51,30,63.0,34,33,50.7,65.2,84.9
3,2018-10-07,@,Chicago Blackhawks,7,6,W,OT,,34,6,...,64,44.8,38,38,50.0,27,26,50.9,43.3,100.0
4,2018-10-09,@,Dallas Stars,7,4,W,,,30,2,...,54,52.6,45,44,50.6,26,29,47.3,50.0,109.4
5,2018-10-11,@,Detroit Red Wings,5,3,W,,,35,13,...,50,42.5,34,41,45.3,27,26,50.9,53.1,100.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,2019-03-30,@,Ottawa Senators,2,4,L,,,44,16,...,36,66.7,51,26,66.2,28,14,66.7,73.1,85.8
79,2019-04-01,@,New York Islanders,2,1,W,,,38,8,...,48,51.5,40,30,57.1,25,23,52.1,58.6,107.7
80,2019-04-02,,Carolina Hurricanes,1,4,L,,,24,6,...,69,42.5,37,54,40.7,32,23,58.2,50.5,91.4
81,2019-04-04,,Tampa Bay Lightning,1,3,L,,,30,2,...,49,52.0,41,41,50.0,19,21,47.5,51.4,96.4


In [6]:
pattern = re.compile('^game(\d+).txt$')
fdir = './data_files/player-level-stats/'
fnames = [fname for fname in os.listdir(fdir) if re.search(pattern, fname)]
fnames

['game1.txt',
 'game10.txt',
 'game11.txt',
 'game12.txt',
 'game13.txt',
 'game14.txt',
 'game15.txt',
 'game16.txt',
 'game17.txt',
 'game18.txt',
 'game19.txt',
 'game2.txt',
 'game20.txt',
 'game21.txt',
 'game22.txt',
 'game23.txt',
 'game24.txt',
 'game25.txt',
 'game26.txt',
 'game27.txt',
 'game28.txt',
 'game29.txt',
 'game3.txt',
 'game30.txt',
 'game31.txt',
 'game32.txt',
 'game33.txt',
 'game34.txt',
 'game35.txt',
 'game36.txt',
 'game37.txt',
 'game38.txt',
 'game39.txt',
 'game4.txt',
 'game40.txt',
 'game41.txt',
 'game42.txt',
 'game43.txt',
 'game44.txt',
 'game45.txt',
 'game46.txt',
 'game47.txt',
 'game48.txt',
 'game49.txt',
 'game5.txt',
 'game50.txt',
 'game51.txt',
 'game52.txt',
 'game53.txt',
 'game54.txt',
 'game55.txt',
 'game56.txt',
 'game57.txt',
 'game58.txt',
 'game59.txt',
 'game6.txt',
 'game60.txt',
 'game61.txt',
 'game62.txt',
 'game63.txt',
 'game64.txt',
 'game65.txt',
 'game66.txt',
 'game67.txt',
 'game68.txt',
 'game69.txt',
 'game7.txt',
 '

In [7]:
player_level_stats_map = {}
for i in range(len(fnames)):
    df = parse_player_level_stats(fdir+fnames[i])
    game_index = int(re.search(pattern, 
                               fnames[i]).group(1))
    player_level_stats_map[game_index] = df
player_level_stats_map

{1:                Player  G  A  PTS  +/-  PIM  EV  PP  SH   GW  EV.1  PP.1  SH.1  \
 Rk                                                                              
 1        Connor Brown  0  0    0  0.0    0   0   0   0  0.0   0.0   0.0   0.0   
 2      Travis Dermott  0  1    1  1.0    0   0   0   0  0.0   1.0   0.0   0.0   
 3         Tyler Ennis  0  0    0 -1.0    0   0   0   0  0.0   0.0   0.0   0.0   
 4       Jake Gardiner  0  1    1  1.0    0   0   0   0  0.0   1.0   0.0   0.0   
 5         Ron Hainsey  0  0    0  0.0    2   0   0   0  0.0   0.0   0.0   0.0   
 6          Zach Hyman  0  0    0  0.0    2   0   0   0  0.0   0.0   0.0   0.0   
 7    Andreas Johnsson  0  0    0  0.0    0   0   0   0  0.0   0.0   0.0   0.0   
 8         Nazem Kadri  0  2    2  0.0    0   0   0   0  0.0   1.0   1.0   0.0   
 9     Kasperi Kapanen  0  0    0  0.0    0   0   0   0  0.0   0.0   0.0   0.0   
 10         Josh Leivo  0  0    0  1.0    0   0   0   0  0.0   0.0   0.0   0.0   
 11       Pär

In [8]:
print('done')

done
