In [2]:
%matplotlib notebook

In [3]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from datetime import date

In [4]:
# Load csv
tourney_results = pd.read_csv('ncaa_tourney_results.csv')
tourney_results.head()

Unnamed: 0,Date,Round,Region,Winning Seed,Winner,Winning Score,Losing Seed,Loser,Losing Score,Overtime
0,3/14/1985,Round of 64,East,1,Georgetown,68,16,Lehigh,43,
1,3/14/1985,Round of 64,East,4,"Loyola, Illinois",59,13,Iona,58,
2,3/14/1985,Round of 64,East,5,Southern Methodist,85,12,Old Dominion,68,
3,3/14/1985,Round of 64,East,8,Temple,60,9,Virginia Tech,57,
4,3/14/1985,Round of 64,Midwest,1,Oklahoma,96,16,North Carolina A&T,83,


In [5]:
# UNC results
only_UNC_wins = tourney_results.loc[tourney_results['Winner'] == "North Carolina", :]
print(only_UNC_wins)

           Date                Round     Region  Winning Seed          Winner  \
8     3/14/1985          Round of 64  Southeast             2  North Carolina   
36    3/16/1985          Round of 32  Southeast             2  North Carolina   
52    3/22/1985        Sweet Sixteen  Southeast             2  North Carolina   
76    3/13/1986          Round of 64       West             3  North Carolina   
102   3/15/1986          Round of 32       West             3  North Carolina   
...         ...                  ...        ...           ...             ...   
1987  3/17/2016          Round of 64       East             1  North Carolina   
2019  3/19/2016          Round of 32       East             1  North Carolina   
2039  3/25/2016        Sweet Sixteen       East             1  North Carolina   
2045  3/27/2016          Elite Eight       East             1  North Carolina   
2047   4/2/2016  National Semifinals        NaN             1  North Carolina   

      Winning Score  Losing

In [6]:
UNC_wins = pd.DataFrame(only_UNC_wins)
UNC_wins.head()

Unnamed: 0,Date,Round,Region,Winning Seed,Winner,Winning Score,Losing Seed,Loser,Losing Score,Overtime
8,3/14/1985,Round of 64,Southeast,2,North Carolina,76,15,Middle Tennessee State,57,
36,3/16/1985,Round of 32,Southeast,2,North Carolina,60,7,Notre Dame,58,
52,3/22/1985,Sweet Sixteen,Southeast,2,North Carolina,62,11,Auburn,56,
76,3/13/1986,Round of 64,West,3,North Carolina,84,14,Utah,72,
102,3/15/1986,Round of 32,West,3,North Carolina,77,6,UAB,59,


In [7]:
# Average UNC seed when UNC wins
UNC_win_seed = UNC_wins['Winning Seed'].mean()
UNC_win_seed

2.1951219512195124

In [8]:
# Average seed defeated
avg_seed_defeated = UNC_wins['Losing Seed'].mean()
avg_seed_defeated

8.74390243902439

In [9]:
# Average UNC score
avg_win_score = UNC_wins['Winning Score'].mean()
avg_win_score

82.48780487804878

In [10]:
# Average defeated team score
avg_loser_score = UNC_wins['Losing Score'].mean()
avg_loser_score

67.86585365853658

In [11]:
# Average margin of victory
avg_MOV = ((avg_win_score) - (avg_loser_score))
avg_MOV

14.621951219512198

In [12]:
# Wins by region
wins_by_region = UNC_wins['Region'].value_counts()
wins_by_region

East                     41
Southeast                11
South                    11
West                      7
Midwest                   5
National Semifinals       3
National Championship     3
Name: Region, dtype: int64

In [13]:
# Wins by round
wins_by_round = UNC_wins['Round'].value_counts()
wins_by_round

Round of 64              28
Round of 32              21
Sweet Sixteen            16
Elite Eight              10
National Semifinals       4
National Championship     3
Name: Round, dtype: int64

In [14]:
# Overtime wins
UNC_wins['Overtime'].value_counts()

1 OT    3
Name: Overtime, dtype: int64

In [17]:
UNC_Ls_by_year = UNC_losses.sort_values(by=['Date'])
UNC_Ls_by_year.head()

Unnamed: 0,Date,Round,Region,Winning Seed,Winner,Winning Score,Losing Seed,Loser,Losing Score,Overtime
897,3/11/1999,Round of 64,West,14,Weber State,76,3,North Carolina,74,
734,3/17/1996,Round of 32,East,3,Texas Tech,92,6,North Carolina,73,
1054,3/18/2001,Round of 32,South,7,Pennsylvania State,82,2,North Carolina,74,
1370,3/19/2006,Round of 32,East,11,George Mason,65,3,North Carolina,60,
113,3/20/1986,Sweet Sixteen,West,2,Louisville,94,3,North Carolina,79,


In [18]:
UNC_Ls_by_year.head()

Unnamed: 0,Date,Round,Region,Winning Seed,Winner,Winning Score,Losing Seed,Loser,Losing Score,Overtime
897,3/11/1999,Round of 64,West,14,Weber State,76,3,North Carolina,74,
734,3/17/1996,Round of 32,East,3,Texas Tech,92,6,North Carolina,73,
1054,3/18/2001,Round of 32,South,7,Pennsylvania State,82,2,North Carolina,74,
1370,3/19/2006,Round of 32,East,11,George Mason,65,3,North Carolina,60,
113,3/20/1986,Sweet Sixteen,West,2,Louisville,94,3,North Carolina,79,


In [19]:
# How many different dates has UNC lost on
UNC_Ls_by_year['Date'].value_counts()

3/24/2013    1
3/23/2014    1
3/25/2007    1
3/20/1986    1
3/25/2012    1
3/20/1994    1
3/26/2015    1
3/24/1985    1
3/30/1991    1
4/5/2008     1
3/27/1992    1
3/17/1996    1
3/28/1998    1
3/11/1999    1
4/4/2016     1
3/21/1987    1
3/27/1988    1
3/27/2011    1
4/1/1995     1
3/22/1990    1
3/20/2004    1
3/23/1989    1
4/1/2000     1
3/18/2001    1
3/29/1997    1
3/19/2006    1
Name: Date, dtype: int64

In [20]:
# Can iterate through dataframe to obtain individual regions
for region in UNC_Ls_by_year.index:
    print(UNC_Ls_by_year['Region'][region], UNC_Ls_by_year['Losing Seed'][region])

West 3
East 6
South 2
East 3
West 3
East 1
South 6
East 1
Midwest 8
Southeast 2
East 6
Southeast 2
South 8
East 1
Midwest 1
West 4
West 2
Southeast 4
East 2
National Semifinals 1
National Semifinals 1
National Semifinals 1
National Semifinals 2
National Semifinals 8
nan 1
National Semifinals 1
