In [18]:
import pandas as pd
import numpy as np

In [2]:
# read raw data
col_names = ['date', 'game_type', 'home_t', 'ht_id', 'game_result', 'game_id', 'away_t', 'at_id']
df = pd.read_csv('raw_data/HA_season_games_calendar.csv',names=col_names)

In [6]:
df[:4]

Unnamed: 0,date,game_type,home_t,ht_id,game_result,game_id,away_t,at_id
0,23.06 - 21:00,Friendly,HC Jelgava Stars,['team_id=42152'],4:1,['match_id=897060599'],FireStorm,['team_id=10056']
1,24.06 - 22:00,Friendly,Lakeside Hoppers,['team_id=39343'],5:2,['match_id=897060558'],HC Jelgava Stars,['team_id=42152']
2,25.06 - 21:00,League 1.round,HC #Dinamo Riga#,['team_id=7903'],3:5,['match_id=897112194'],HC Jelgava Stars,['team_id=42152']
3,26.06 - 21:00,League 2.round,HC Jelgava Stars,['team_id=42152'],4:2,['match_id=897112204'],Rozetes,['team_id=20386']


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67 entries, 0 to 66
Data columns (total 8 columns):
date           67 non-null object
game_type      67 non-null object
home_t         67 non-null object
ht_id          67 non-null object
game_result    67 non-null object
game_id        67 non-null object
away_t         67 non-null object
at_id          67 non-null object
dtypes: object(8)
memory usage: 4.3+ KB


#### split game_result into columns home goals and away goals and set  int type

In [8]:
game_scores = pd.DataFrame(df.game_result.str.split(':').tolist(),
                                   columns = ['home_g','away_g'])

In [9]:
game_scores[['home_g','away_g']] = game_scores[['home_g','away_g']].astype(int)

#### new df with necessary columns

In [10]:
new_df = pd.concat([df,game_scores],axis=1)
new_df = new_df[['game_type', 'home_t','home_g','away_g', 'away_t']]

In [12]:
new_df[:4]

Unnamed: 0,game_type,home_t,home_g,away_g,away_t
0,Friendly,HC Jelgava Stars,4,1,FireStorm
1,Friendly,Lakeside Hoppers,5,2,HC Jelgava Stars
2,League 1.round,HC #Dinamo Riga#,3,5,HC Jelgava Stars
3,League 2.round,HC Jelgava Stars,4,2,Rozetes


## separating games by home games and away games and game type aswell

    - Friendly
    - League
    - Playoff

In [13]:
f_home = new_df.loc[(df['home_t'] == 'HC Jelgava Stars') & (new_df['game_type'] == 'Friendly')]
f_away = new_df.loc[(df['home_t'] != 'HC Jelgava Stars') & (new_df['game_type'] == 'Friendly')]
l_home = new_df.loc[(df['home_t'] == 'HC Jelgava Stars') & (new_df.game_type.str.startswith('Le'))]
l_away = new_df.loc[(df['home_t'] != 'HC Jelgava Stars') & (new_df.game_type.str.startswith('Le'))]
pl_home = new_df.loc[(df['home_t'] == 'HC Jelgava Stars') & (new_df.game_type.str.startswith('Pl'))]
pl_away = new_df.loc[(df['home_t'] != 'HC Jelgava Stars') & (new_df.game_type.str.startswith('Pl'))]

### resulting game calculation 
    count :
        - games played
        - home / away team goals
        - wins/loss/draws

In [15]:
game_table = {'friendly':[f_home,f_away],
                  'league' : [l_home,l_away],
                  'playoff' :[pl_home,pl_away]}

##### Home games


In [16]:
home_game_scores = {'friendly':{},
               'league':{},
               'playoff':{}}

In [19]:
for i in game_table:
    home_team_goals = game_table[i][0]['home_g'].sum()
    away_team_goals = game_table[i][0]['away_g'].sum()
    games_played = len(game_table[i][0]['home_g'])
    wins = np.greater(game_table[i][0]['home_g'],game_table[i][0]['away_g']).sum()
    loss = np.less(game_table[i][0]['home_g'],game_table[i][0]['away_g']).sum()
    draw = np.equal(game_table[i][0]['home_g'],game_table[i][0]['away_g']).sum()
    
    game_res = {'GamesPlayed':games_played,
                'HomeGoals':home_team_goals,
                'AwayGoals':away_team_goals,
                'Wins': wins,
                'Loss': loss,
                'Draw': draw}    
    home_game_scores[i].update(game_res)
    print('GamesPlayed', games_played, 'HomeGoals', home_team_goals,'AwayGoals', away_team_goals)

GamesPlayed 12 HomeGoals 44 AwayGoals 29
GamesPlayed 15 HomeGoals 54 AwayGoals 33
GamesPlayed 3 HomeGoals 16 AwayGoals 10


#### Away games

In [21]:
away_game_scores = {'friendly':{},
               'league':{},
               'playoff':{}}

In [22]:
for i in game_table:
    home_team_goals = game_table[i][1]['home_g'].sum()
    away_team_goals = game_table[i][1]['away_g'].sum()
    games_played = len(game_table[i][1]['home_g'])
    loss = np.greater(game_table[i][1]['home_g'],game_table[i][1]['away_g']).sum()
    wins = np.less(game_table[i][1]['home_g'],game_table[i][1]['away_g']).sum()
    draw = np.equal(game_table[i][1]['home_g'],game_table[i][1]['away_g']).sum()
    
    game_res = {'GamesPlayed':games_played,
                'HomeGoals':home_team_goals,
                'AwayGoals':away_team_goals,
                'Wins': wins,
                'Loss': loss,
                'Draw': draw}    
    away_game_scores[i].update(game_res)

#### create new df from dicts 
    - home_game_scores   - score_df
    - away_game_scores

In [29]:
# Home games

In [23]:
score_df = pd.DataFrame.from_dict(home_game_scores,orient='index')

In [26]:
score_df = score_df.reset_index()
score_df

Unnamed: 0,index,GamesPlayed,HomeGoals,AwayGoals,Wins,Loss,Draw
0,friendly,12,44,29,7,4,1
1,league,15,54,33,12,3,0
2,playoff,3,16,10,2,1,0


In [28]:
# apply _home to index end.

score_df['index'] = score_df['index'].apply(lambda x: f"{x}_home")

In [30]:
# away games

In [31]:
scores = pd.DataFrame.from_dict(away_game_scores,orient='index')

In [32]:
scores = scores.reset_index()
scores['index'] = scores['index'].apply(lambda x: f"{x}_away")
scores

Unnamed: 0,index,GamesPlayed,HomeGoals,AwayGoals,Wins,Loss,Draw
0,friendly_away,17,41,73,11,5,1
1,league_away,15,43,49,8,7,0
2,playoff_away,4,16,14,3,1,0


In [34]:
game_score = pd.concat([score_df,scores])
game_score = game_score.sort_values(by=['index'])

In [35]:
game_score

Unnamed: 0,index,GamesPlayed,HomeGoals,AwayGoals,Wins,Loss,Draw
0,friendly_away,17,41,73,11,5,1
0,friendly_home_home,12,44,29,7,4,1
1,league_away,15,43,49,8,7,0
1,league_home_home,15,54,33,12,3,0
2,playoff_away,4,16,14,3,1,0
2,playoff_home_home,3,16,10,2,1,0


In [37]:
game_score.sum()

index          friendly_awayfriendly_home_homeleague_awayleag...
GamesPlayed                                                   66
HomeGoals                                                    214
AwayGoals                                                    208
Wins                                                          43
Loss                                                          21
Draw                                                           2
dtype: object