In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
%matplotlib inline

In [2]:
df = pd.read_csv('nhl_stats.csv', index_col=0)
df.head()

Unnamed: 0,team,date,points,RW,ROW,SOW,goals,goals_against,power_play,penalty_kill,net_ppp,net_pkp,shots,shots_against,FOWp
0,New Jersey Devils,2005/10/05vs PIT,2,1,1,0,5,1,25.0,90.9,25.0,90.9,37.0,37.0,57.1
1,New York Islanders,2005/10/05@ BUF,0,0,0,0,4,6,14.3,81.8,14.3,81.8,26.0,39.0,51.5
2,New York Rangers,2005/10/05@ PHI,2,1,1,0,5,3,50.0,90.0,50.0,90.0,27.0,27.0,51.6
3,Philadelphia Flyers,2005/10/05vs NYR,0,0,0,0,3,5,10.0,50.0,10.0,50.0,27.0,27.0,48.4
4,Pittsburgh Penguins,2005/10/05@ NJD,0,0,0,0,1,5,9.1,75.0,9.1,75.0,37.0,37.0,42.9


In [3]:
df.dtypes

team              object
date              object
points             int64
RW                 int64
ROW                int64
SOW                int64
goals              int64
goals_against      int64
power_play        object
penalty_kill      object
net_ppp           object
net_pkp           object
shots            float64
shots_against    float64
FOWp             float64
dtype: object

In [4]:
df=df.dropna()

In [5]:
df.tail()

Unnamed: 0,team,date,points,RW,ROW,SOW,goals,goals_against,power_play,penalty_kill,net_ppp,net_pkp,shots,shots_against,FOWp
40129,Arizona Coyotes,2022/04/29vs NSH,2,1,1,0,5,4,0.0,100.0,0.0,100.0,27.0,33.0,50.8
40130,Vegas Golden Knights,2022/04/29@ STL,2,1,1,0,7,4,50.0,33.3,50.0,33.3,45.0,18.0,53.7
40131,Seattle Kraken,2022/04/29vs SJS,2,1,1,0,3,0,0.0,100.0,0.0,100.0,31.0,24.0,58.3
40132,Winnipeg Jets,2022/05/01vs SEA,2,1,1,0,4,3,0.0,0.0,0.0,0.0,27.0,30.0,59.1
40133,Seattle Kraken,2022/05/01@ WPG,0,0,0,0,3,4,100.0,100.0,100.0,100.0,30.0,27.0,40.9


In [6]:
df['opponent'] = df.date.apply(lambda x: x.split()[1])

In [7]:
df['opponent'].unique()

array(['PIT', 'BUF', 'PHI', 'NYR', 'NJD', 'MTL', 'NYI', 'BOS', 'TOR',
       'OTT', 'FLA', 'TBL', 'ATL', 'CAR', 'CBJ', 'ANA', 'STL', 'SJS',
       'DET', 'MIN', 'EDM', 'COL', 'PHX', 'CHI', 'LAK', 'DAL', 'VAN',
       'NSH', 'WSH', 'CGY', 'WPG', 'ARI', 'VGK', 'SEA'], dtype=object)

In [8]:
df['team'].unique()

array(['New Jersey Devils', 'New York Islanders', 'New York Rangers',
       'Philadelphia Flyers', 'Pittsburgh Penguins', 'Boston Bruins',
       'Buffalo Sabres', 'Montréal Canadiens', 'Ottawa Senators',
       'Toronto Maple Leafs', 'Atlanta Thrashers', 'Carolina Hurricanes',
       'Florida Panthers', 'Tampa Bay Lightning', 'Washington Capitals',
       'Chicago Blackhawks', 'Detroit Red Wings', 'Nashville Predators',
       'St. Louis Blues', 'Calgary Flames', 'Colorado Avalanche',
       'Edmonton Oilers', 'Vancouver Canucks', 'Anaheim Ducks',
       'Dallas Stars', 'Los Angeles Kings', 'Phoenix Coyotes',
       'San Jose Sharks', 'Columbus Blue Jackets', 'Minnesota Wild',
       'Winnipeg Jets', 'Arizona Coyotes', 'Vegas Golden Knights',
       'Seattle Kraken'], dtype=object)

In [9]:
abb_to_team = {'PIT':'Pittsburgh Penguins', 'PHI':'Philadelphia Flyers',
'TOR':'Toronto Maple Leafs', 'MTL':'Montréal Canadiens', 'MIN':'Minnesota Wild',
'COL':'Colorado Avalanche', 'EDM':'Edmonton Oilers', 'CHI':'Chicago Blackhawks',
'CGY':'Calgary Flames', 'CAR':'Carolina Hurricanes', 'DAL':'Dallas Stars',
'OTT':'Ottawa Senators', 'BUF':'Buffalo Sabres', 'WSH':'Washington Capitals',
'ATL':'Atlanta Thrashers', 'ANA':'Anaheim Ducks', 'DET':'Detroit Red Wings',
'NJD':'New Jersey Devils','CBJ':'Columbus Blue Jackets', 'SJS':'San Jose Sharks',
'STL':'St. Louis Blues', 'PHX':'Phoenix Coyotes', 'NYR':'New York Rangers',
'TBL':'Tampa Bay Lightning', 'LAK':'Los Angeles Kings', 'NSH':'Nashville Predators',
'NYI':'New York Islanders', 'VAN':'Vancouver Canucks', 'BOS':'Boston Bruins',
'FLA':'Florida Panthers', 'WPG':'Winnipeg Jets', 'ARI':'Arizona Coyotes',
'VGK':'Vegas Golden Knights', 'SEA':'Seattle Kraken'}

In [10]:
df['date'] = df['date'].apply(lambda x: pd.to_datetime(x[0:10], format='%Y/%m/%d'))
df['opponent'] = df['opponent'].apply(lambda x: abb_to_team[x])
df.head()

Unnamed: 0,team,date,points,RW,ROW,SOW,goals,goals_against,power_play,penalty_kill,net_ppp,net_pkp,shots,shots_against,FOWp,opponent
0,New Jersey Devils,2005-10-05,2,1,1,0,5,1,25.0,90.9,25.0,90.9,37.0,37.0,57.1,Pittsburgh Penguins
1,New York Islanders,2005-10-05,0,0,0,0,4,6,14.3,81.8,14.3,81.8,26.0,39.0,51.5,Buffalo Sabres
2,New York Rangers,2005-10-05,2,1,1,0,5,3,50.0,90.0,50.0,90.0,27.0,27.0,51.6,Philadelphia Flyers
3,Philadelphia Flyers,2005-10-05,0,0,0,0,3,5,10.0,50.0,10.0,50.0,27.0,27.0,48.4,New York Rangers
4,Pittsburgh Penguins,2005-10-05,0,0,0,0,1,5,9.1,75.0,9.1,75.0,37.0,37.0,42.9,New Jersey Devils


In [11]:
df = df.astype({'RW':int, 'ROW':int, 'SOW':int})
df.head()

Unnamed: 0,team,date,points,RW,ROW,SOW,goals,goals_against,power_play,penalty_kill,net_ppp,net_pkp,shots,shots_against,FOWp,opponent
0,New Jersey Devils,2005-10-05,2,1,1,0,5,1,25.0,90.9,25.0,90.9,37.0,37.0,57.1,Pittsburgh Penguins
1,New York Islanders,2005-10-05,0,0,0,0,4,6,14.3,81.8,14.3,81.8,26.0,39.0,51.5,Buffalo Sabres
2,New York Rangers,2005-10-05,2,1,1,0,5,3,50.0,90.0,50.0,90.0,27.0,27.0,51.6,Philadelphia Flyers
3,Philadelphia Flyers,2005-10-05,0,0,0,0,3,5,10.0,50.0,10.0,50.0,27.0,27.0,48.4,New York Rangers
4,Pittsburgh Penguins,2005-10-05,0,0,0,0,1,5,9.1,75.0,9.1,75.0,37.0,37.0,42.9,New Jersey Devils
