# Matchups
Analyse the performance of each team against the others

In [1]:
import pandas as pd
import numpy as np

## Read and convert data

In [2]:
scores = pd.read_pickle('data/scores.pkl')
scores.head()

Unnamed: 0,Date,Visitor,Visitor Goals,Home,Home Goals,Overtime,Attendance,Length,Notes,Season,Type
0,2019-04-10,Pittsburgh Penguins,3,New York Islanders,4,OT,13917,0 days 03:06:00,at Nassau Veterans Memorial Coliseum (Uniondal...,2018-2019,playoffs
1,2019-04-10,Dallas Stars,3,Nashville Predators,2,,17458,0 days 02:30:00,,2018-2019,playoffs
2,2019-04-10,St. Louis Blues,2,Winnipeg Jets,1,,15321,0 days 02:31:00,,2018-2019,playoffs
3,2019-04-10,Vegas Golden Knights,2,San Jose Sharks,5,,17562,0 days 02:39:00,,2018-2019,playoffs
4,2019-04-10,Columbus Blue Jackets,4,Tampa Bay Lightning,3,,19092,0 days 02:28:00,,2018-2019,playoffs


In [3]:
df = pd.DataFrame({'Team': pd.Series([], dtype='category'),
                   'Opponent': pd.Series([], dtype='category'),
                   'Goals': pd.Series([], dtype='int'),    # Goals scored
                   'GA': pd.Series([], dtype='int'),       # Goals against
                   'Home': pd.Series([], dtype='bool'),
                   'Overtime': pd.Series([], dtype='bool'),
                   'Date': pd.Series([], dtype='datetime64[ns]'),
                   'Season': pd.Series([], dtype='category'),
                   'Type': pd.Series([], dtype='category')})

In [4]:
home = scores.drop(columns=['Attendance', 'Length', 'Notes'])
away = home.copy()

In [5]:
home = home.rename(columns={'Home':'Team', 'Visitor':'Opponent', 'Home Goals':'Goals', 'Visitor Goals':'GA'}, inplace=False)
home['Home'] = True
home['Overtime'] = home['Overtime'].notnull()

In [6]:
df = df.append(home, ignore_index=True)

In [7]:
away = away.rename(columns={'Visitor':'Team', 'Home':'Opponent', 'Home Goals':'GA', 'Visitor Goals':'Goals'}, inplace=False)
away['Home'] = False
away['Overtime'] = away['Overtime'].notnull()

In [8]:
df = pd.concat([df, home, away], ignore_index=True)
display(df)

Unnamed: 0,Team,Opponent,Goals,GA,Home,Overtime,Date,Season,Type
0,New York Islanders,Pittsburgh Penguins,4,3,True,True,2019-04-10,2018-2019,playoffs
1,Nashville Predators,Dallas Stars,2,3,True,False,2019-04-10,2018-2019,playoffs
2,Winnipeg Jets,St. Louis Blues,1,2,True,False,2019-04-10,2018-2019,playoffs
3,San Jose Sharks,Vegas Golden Knights,5,2,True,False,2019-04-10,2018-2019,playoffs
4,Tampa Bay Lightning,Columbus Blue Jackets,3,4,True,False,2019-04-10,2018-2019,playoffs
...,...,...,...,...,...,...,...,...,...
9781,Dallas Stars,Tampa Bay Lightning,2,3,False,False,2020-09-21,2019-2020,playoffs
9782,Tampa Bay Lightning,Dallas Stars,5,2,False,False,2020-09-23,2019-2020,playoffs
9783,Tampa Bay Lightning,Dallas Stars,5,4,False,True,2020-09-25,2019-2020,playoffs
9784,Dallas Stars,Tampa Bay Lightning,3,2,False,True,2020-09-26,2019-2020,playoffs


## Team analysis

In [9]:
def record_per_team(data, team, season=None, playoffs=False):
    record = pd.DataFrame({'Opponent': pd.Series([], dtype='category'),
                           'Games': pd.Series([], dtype='int'),
                           'Wins': pd.Series([], dtype='int'),
                           'Losses': pd.Series([], dtype='int'),
                           'OT Wins': pd.Series([], dtype='int'),
                           'OT Losses': pd.Series([], dtype='int'),
                           'ROW': pd.Series([], dtype='int'),
                           'Goals': pd.Series([], dtype='int'),     # Goals scored
                           'GA': pd.Series([], dtype='int')})       # Goals against
    
    df = data.loc[data['Team'] == team].copy()
    record['Opponent'] = df['Opponent'].unique()
    record = record.apply(lambda x: x.fillna(0) if x.dtype.kind in 'biufc' else x)

    for _, row in df.iterrows():
        idx = record.index[record['Opponent'] == row['Opponent']].tolist()[0]

        record.at[idx, 'Games'] += 1

        if row['Goals'] > row['GA']:
            record.at[idx, 'Wins'] += 1
            if row['Overtime']:
                record.at[idx, 'OT Wins'] += 1
        elif row['Overtime']:
            record.at[idx, 'OT Losses'] += 1
        else:
            record.at[idx, 'Losses'] += 1

        record.at[idx, 'Goals'] += row['Goals']
        record.at[idx, 'GA'] += row['GA']

    record['Diff'] = record['Goals'] - record['GA']
    record['ROW'] = record['Wins'] - record['OT Wins']  # Regulation Wins
    display(record)

In [10]:
# for team, data in df.groupby('Team'):
record_per_team(df, 'Tampa Bay Lightning')

Unnamed: 0,Opponent,Games,Wins,Losses,OT Wins,OT Losses,ROW,Goals,GA,Diff
0,Columbus Blue Jackets,28.0,16.0,12.0,6.0,0.0,10.0,87.0,80.0,7.0
1,Chicago Blackhawks,17.0,14.0,2.0,1.0,1.0,13.0,70.0,40.0,30.0
2,Nashville Predators,18.0,10.0,6.0,1.0,2.0,9.0,58.0,52.0,6.0
3,Detroit Red Wings,19.0,15.0,3.0,2.0,1.0,13.0,61.0,47.0,14.0
4,Florida Panthers,22.0,16.0,6.0,5.0,0.0,11.0,93.0,63.0,30.0
5,Carolina Hurricanes,19.0,13.0,4.0,2.0,2.0,11.0,55.0,37.0,18.0
6,Dallas Stars,21.0,13.0,3.0,2.0,5.0,11.0,67.0,43.0,24.0
7,Vancouver Canucks,5.0,3.0,2.0,0.0,0.0,3.0,25.0,14.0,11.0
8,New Jersey Devils,7.0,6.0,1.0,1.0,0.0,5.0,39.0,20.0,19.0
9,Edmonton Oilers,5.0,5.0,0.0,0.0,0.0,5.0,22.0,9.0,13.0
