In [4]:
#import modules
import csv
import pandas as pd

In [78]:
#source of data is basketball-reference.com located "http://www.basketball-reference.com/playoffs/series.html"
#open csv file and read to dataframe
df = pd.read_csv('NBA Playoffs data.csv', sep=',',skiprows = 1 ) #skip first row to get appropriate column headers
df.rename(columns={'Team': 'Winner', 'Team.1': 'Loser','W': 'Winner #wins', 'W.1': 'Loser #wins'}, inplace=True)

In [79]:
df.head(15)

Unnamed: 0,Yr,Lg,Series,Unnamed: 3,Unnamed: 4,Winner,Winner #wins,Unnamed: 7,Loser,Loser #wins
0,2016,NBA,Eastern Conf First Round,Apr 17 - Apr 24 2016,,Cleveland Cavaliers (1),4,,Detroit Pistons (8),0
1,2016,NBA,Eastern Conf First Round,Apr 16 - May 1 2016,,Toronto Raptors (2),4,,Indiana Pacers (7),3
2,2016,NBA,Eastern Conf First Round,Apr 17 - May 1 2016,,Miami Heat (3),4,,Charlotte Hornets (6),3
3,2016,NBA,Eastern Conf First Round,Apr 16 - Apr 28 2016,,Atlanta Hawks (4),4,,Boston Celtics (5),2
4,2016,NBA,Western Conf First Round,Apr 16 - Apr 27 2016,,Golden State Warriors (1),4,,Houston Rockets (8),1
5,2016,NBA,Western Conf First Round,Apr 17 - Apr 24 2016,,San Antonio Spurs (2),4,,Memphis Grizzlies (7),0
6,2016,NBA,Western Conf First Round,Apr 16 - Apr 25 2016,,Oklahoma City Thunder (3),4,,Dallas Mavericks (6),1
7,2016,NBA,Western Conf First Round,Apr 17 - Apr 29 2016,,Portland Trail Blazers (5),4,,Los Angeles Clippers (4),2
8,2016,NBA,Eastern Conf Semifinals,May 2 - May 8 2016,,Cleveland Cavaliers (1),4,,Atlanta Hawks (4),0
9,2016,NBA,Eastern Conf Semifinals,May 3 - May 15 2016,,Toronto Raptors (2),4,,Miami Heat (3),3


### Lets look at only NBA Finals Games and some of the important columns

In [80]:
finals_df = df[df.Series =='Finals'] #get only finals games
finals_df = finals_df[['Winner','Winner #wins','Loser','Loser #wins']] #get only necessary columns
finals_df['Total_Games'] = finals_df['Winner #wins'] + finals_df['Loser #wins'] #create a new column showing total games played in each series
finals_df.index = range (0,76,1) #reset index
finals_df.head()

Unnamed: 0,Winner,Winner #wins,Loser,Loser #wins,Total_Games
0,Cleveland Cavaliers (1),4,Golden State Warriors (1),3,7
1,Golden State Warriors (1),4,Cleveland Cavaliers (2),2,6
2,San Antonio Spurs (1),4,Miami Heat (2),1,5
3,Miami Heat (1),4,San Antonio Spurs (2),3,7
4,Miami Heat (2),4,Oklahoma City Thunder (2),1,5


### Lets calculate the percentages for each possible series length

In [81]:
for i in range(4,8): #iterate through the 4 possibilities (4-game, 5-game, 6-game and 7-game series)
    #count each outcome and divide it by the total number of nba finals series, 76 (years 1950 to 2016)
    no_of_games = float(finals_df[finals_df.Total_Games ==i]['Total_Games'].size)/76.
    print "NBA finals series went to %i-games, "%(i)+'{percent:.1%}'.format(percent = no_of_games)+" of the time"

NBA finals series went to 4-games, 10.5% of the time
NBA finals series went to 5-games, 25.0% of the time
NBA finals series went to 6-games, 35.5% of the time
NBA finals series went to 7-games, 28.9% of the time


### Lets Calculate the number of games won in a NBA finals matchup by team (as a loser or winner). 
### Did this team win the most championships?

In [91]:
finals_df[['Winner','Winner #wins']].groupby(by='Winner')\
    .sum().sort_values(by='Winner #wins',ascending = False).head()

Unnamed: 0_level_0,Winner #wins
Winner,Unnamed: 1_level_1
Boston Celtics (1),56
Los Angeles Lakers (1),36
Chicago Bulls (1),20
San Antonio Spurs (1),12
Minneapolis Lakers (2),8


### This shows the number of wins by the series winner. Also, we need to remove the numbers after the team name, which represents the conference standing at the end of the regular season. That way the data rolls up by team properly.

In [92]:
finals_df_no_seed = finals_df.copy()
for col_name in ['Winner','Loser']:
    finals_df_no_seed[col_name] = finals_df_no_seed[col_name].map(lambda x: x[:-4])
finals_df_no_seed.head()

Unnamed: 0,Winner,Winner #wins,Loser,Loser #wins,Total_Games
0,Cleveland Cavaliers,4,Golden State Warriors,3,7
1,Golden State Warriors,4,Cleveland Cavaliers,2,6
2,San Antonio Spurs,4,Miami Heat,1,5
3,Miami Heat,4,San Antonio Spurs,3,7
4,Miami Heat,4,Oklahoma City Thunder,1,5


### Lets now calculated the total # of wins for each team

In [115]:
winner_grouped = finals_df_no_seed[['Winner','Winner #wins']].groupby(by='Winner')
loser_grouped = finals_df_no_seed[['Loser','Loser #wins']].groupby(by='Loser')
loser_grouped.sum()['Loser #wins'].add(winner_grouped.sum()['Winner #wins'],fill_value = 0).to_frame('Total Wins')\
    .sort_values('Total Wins', ascending = False).head()
    

Unnamed: 0,Total Wins
Boston Celtics,77.0
Los Angeles Lakers,69.0
Chicago Bulls,24.0
San Antonio Spurs,23.0
New York Knicks,20.0


### So Celtics have won the most games in finals matchups

In [127]:
finals_df_no_seed[['Winner','Winner #wins']].groupby(by='Winner')\
    .count().sort_values(by='Winner #wins',ascending = False).head(5)

Unnamed: 0_level_0,Winner #wins
Winner,Unnamed: 1_level_1
Boston Celtics,17
Los Angeles Lakers,11
Chicago Bulls,6
San Antonio Spurs,5
Minneapolis Lakers,4


### They have also won the most NBA finals series

In [128]:
finals_df_no_seed[['Loser','Loser #wins']].groupby(by='Loser')\
    .count().sort_values(by='Loser #wins',ascending = False).head(5)

Unnamed: 0_level_0,Loser #wins
Loser,Unnamed: 1_level_1
Los Angeles Lakers,14
New York Knicks,6
Boston Celtics,4
Philadelphia 76ers,4
St. Louis Hawks,3


### It's interesting to note that the LA Lakers (previously Minneapolis Lakers) have lost more finals series than they've won since they moved from Minneapolis to LA (11/25 or 44% winning percentage). 

![](http://media.celebremix.com/5/celebrity/431/1333657330.jpg)