In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from bs4 import BeautifulSoup

# Question 1

In [2]:
# loading NHL csv to a Dataframe
nhl_df = pd.read_csv("datasets/nhl.csv")

# uploading DBs
cities = pd.read_html("datasets/List of American and Canadian cities by number of major professional sports franchises - Wikipedia.html")[1]
# deleting unnecesary columns from cities
cities = cities.loc[:,["Metropolitan area","Population (2016 est.)[8]","NFL","MLB","NBA","NHL"]]

# filtering data per year. we only want 2018
nhl_df2 = nhl_df[nhl_df["year"] == 2018]

# deleting delinquent rows...
nhl_df2.drop([0,9,18,26], axis=0, inplace=True)

# changing columns to correct dtype
nhl_df2 = nhl_df2.astype({"W":"int32","L":"int32"})

# calculating win/loss ratio
    ## defining function
def w_l_ratio(frame):
    frame["W/L Ratio"] = frame["W"] / (frame["W"]+frame["L"])
    
    return frame
    
    ## applying
nhl_df2 = nhl_df2.apply(w_l_ratio, axis=1)
nhl_df2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,ROW,year,League,W/L Ratio
1,Tampa Bay Lightning*,82,54,23,5,113,0.689,296,236,0.66,-0.07,0.634,48,2018,NHL,0.701299
2,Boston Bruins*,82,50,20,12,112,0.683,270,214,0.62,-0.07,0.61,47,2018,NHL,0.714286
3,Toronto Maple Leafs*,82,49,26,7,105,0.64,277,232,0.49,-0.06,0.567,42,2018,NHL,0.653333
4,Florida Panthers,82,44,30,8,96,0.585,248,246,-0.01,-0.04,0.537,41,2018,NHL,0.594595
5,Detroit Red Wings,82,30,39,13,73,0.445,217,255,-0.48,-0.01,0.341,25,2018,NHL,0.434783
6,Montreal Canadiens,82,29,40,13,71,0.433,209,264,-0.68,0.0,0.378,27,2018,NHL,0.42029
7,Ottawa Senators,82,28,43,11,67,0.409,221,291,-0.85,0.0,0.372,26,2018,NHL,0.394366
8,Buffalo Sabres,82,25,45,12,62,0.378,199,280,-0.98,0.01,0.311,24,2018,NHL,0.357143
10,Washington Capitals*,82,49,26,7,105,0.64,259,239,0.21,-0.04,0.585,46,2018,NHL,0.653333
11,Pittsburgh Penguins*,82,47,29,6,100,0.61,272,250,0.23,-0.04,0.573,45,2018,NHL,0.618421


In [3]:
# creating mapping between teams and metropolitan areas
lista = list(nhl_df2["team"])
team_to_area = {item:"0" for item in lista}
team_to_area['Tampa Bay Lightning*'] = 'Tampa Bay Area'
team_to_area['Boston Bruins*'] = 'Boston'
team_to_area['Toronto Maple Leafs*'] = 'Toronto'
team_to_area['Florida Panthers'] = 'Miami–Fort Lauderdale'
team_to_area['Detroit Red Wings'] = 'Detroit'
team_to_area['Montreal Canadiens'] = 'Montreal'
team_to_area['Ottawa Senators'] = "Ottawa"
team_to_area['Buffalo Sabres'] = "Buffalo"
team_to_area['Washington Capitals*'] = "Washington, D.C."
team_to_area['Pittsburgh Penguins*'] = "Pittsburgh"
team_to_area['Philadelphia Flyers*'] = "Philadelphia"
team_to_area['Columbus Blue Jackets*'] = "Columbus"
team_to_area['New Jersey Devils*'] = "New York City"
team_to_area['Carolina Hurricanes'] = "Raleigh"
team_to_area['New York Islanders'] = "New York City"
team_to_area['Nashville Predators*'] = "Nashville"
team_to_area['New York Rangers'] = "New York City"
team_to_area['Winnipeg Jets*'] = "Winnipeg"
team_to_area['Minnesota Wild*'] = "Minneapolis–Saint Paul"
team_to_area['Colorado Avalanche*'] = "Denver"
team_to_area['St. Louis Blues'] = "St. Louis"
team_to_area['Dallas Stars'] = "Dallas–Fort Worth"
team_to_area['Chicago Blackhawks'] = "Chicago"
team_to_area['Vegas Golden Knights*'] = "Las Vegas"
team_to_area['Anaheim Ducks*'] = "Los Angeles"
team_to_area['San Jose Sharks*'] = "San Francisco Bay Area"
team_to_area['Los Angeles Kings*'] = "Los Angeles"
team_to_area['Calgary Flames'] = "Calgary"
team_to_area['Edmonton Oilers'] = "Edmonton"
team_to_area['Vancouver Canucks'] = "Vancouver"
team_to_area['Arizona Coyotes'] = "Phoenix"

# creating a new column for metropolitan area
nhl_df2["Metropolitan area"] = nhl_df2["team"].apply(lambda x: team_to_area[x] if x in team_to_area else np.nan)
nhl_df2

Unnamed: 0,team,GP,W,L,OL,PTS,PTS%,GF,GA,SRS,SOS,RPt%,ROW,year,League,W/L Ratio,Metropolitan area
1,Tampa Bay Lightning*,82,54,23,5,113,0.689,296,236,0.66,-0.07,0.634,48,2018,NHL,0.701299,Tampa Bay Area
2,Boston Bruins*,82,50,20,12,112,0.683,270,214,0.62,-0.07,0.61,47,2018,NHL,0.714286,Boston
3,Toronto Maple Leafs*,82,49,26,7,105,0.64,277,232,0.49,-0.06,0.567,42,2018,NHL,0.653333,Toronto
4,Florida Panthers,82,44,30,8,96,0.585,248,246,-0.01,-0.04,0.537,41,2018,NHL,0.594595,Miami–Fort Lauderdale
5,Detroit Red Wings,82,30,39,13,73,0.445,217,255,-0.48,-0.01,0.341,25,2018,NHL,0.434783,Detroit
6,Montreal Canadiens,82,29,40,13,71,0.433,209,264,-0.68,0.0,0.378,27,2018,NHL,0.42029,Montreal
7,Ottawa Senators,82,28,43,11,67,0.409,221,291,-0.85,0.0,0.372,26,2018,NHL,0.394366,Ottawa
8,Buffalo Sabres,82,25,45,12,62,0.378,199,280,-0.98,0.01,0.311,24,2018,NHL,0.357143,Buffalo
10,Washington Capitals*,82,49,26,7,105,0.64,259,239,0.21,-0.04,0.585,46,2018,NHL,0.653333,"Washington, D.C."
11,Pittsburgh Penguins*,82,47,29,6,100,0.61,272,250,0.23,-0.04,0.573,45,2018,NHL,0.618421,Pittsburgh


In [4]:
# creating relevant through an inner join.
t1 = pd.merge(cities, nhl_df2, how="inner", on="Metropolitan area")

# defining dummy function
def keep(val):
    if len(val) > 1:
        return val.iloc[0]
    return val


In [5]:
from scipy import stats
# fixing data type
t1 = t1.astype({"Population (2016 est.)[8]":"int64"})
t1 = t1.pivot_table(index="Metropolitan area", aggfunc={"W/L Ratio":np.average,"Population (2016 est.)[8]":keep})


In [6]:
results = stats.pearsonr(t1["Population (2016 est.)[8]"], t1["W/L Ratio"])
results

(0.012486162921209923, 0.9497182859911781)

In [7]:
%%timeit -n 100
results = stats.pearsonr(t1["Population (2016 est.)[8]"], t1["W/L Ratio"])

92 µs ± 22.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
%%timeit -n 100
area = list(t1["Population (2016 est.)[8]"])
pop = list(t1["W/L Ratio"])

results = stats.pearsonr(area,pop)

86.4 µs ± 23.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Question 2

In [9]:
# upload DF
nba_df = pd.read_csv("datasets/nba.csv")

# create mask tso only 2018 results are relevant
nba_df = nba_df[(nba_df["year"] == 2018)]

# convert w/l % to float
nba_df = nba_df.astype({"W/L%":"float"})

In [10]:
# clean column via apply
    #explanation
        ## separate each team name via ssplit through whitespaces. then, join everything back but omit the last element since it is a paranthesis and a number
nba_df["team"] = nba_df["team"].apply(lambda x: " ".join(x.split()[0:2]) if len(x.split()) == 3 else " ".join(x.split()[0:3]))
# fix this one for the dictionary
nba_df["team"].loc[25] = 'Los Angeles2'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [11]:
# creating dictionaries with link from team to area
lista = list(nba_df["team"])
name_to_team = {name:"0" for name in lista}
name_to_team['Toronto Raptors*'] = "Toronto"
name_to_team['Boston Celtics*'] = "Boston"
name_to_team['Philadelphia 76ers*'] = "Philadelphia"
name_to_team['Cleveland Cavaliers*'] = "Cleveland"
name_to_team['Indiana Pacers*'] = "Indianapolis"
name_to_team['Miami Heat*'] = "Miami–Fort Lauderdale"
name_to_team['Milwaukee Bucks*'] = "Milwaukee"
name_to_team['Washington Wizards*'] = "Washington, D.C."
name_to_team['Detroit Pistons'] = "Detroit"
name_to_team['Charlotte Hornets'] = "Charlotte"
name_to_team['New York'] = "New York City"
name_to_team['Brooklyn Nets'] = "New York City"
name_to_team['Chicago Bulls'] = "Chicago"
name_to_team['Orlando Magic'] = "Orlando"
name_to_team['Atlanta Hawks'] = "Atlanta"
name_to_team['Houston Rockets*'] = "Houston"
name_to_team['Golden State'] = "San Francisco Bay Area"
name_to_team['Portland Trail'] = "Portland"
name_to_team['Oklahoma City'] = "Oklahoma City"
name_to_team['Utah Jazz*'] = "Salt Lake City"
name_to_team['New Orleans'] = "New Orleans"
name_to_team['San Antonio'] = "San Antonio"
name_to_team['Minnesota Timberwolves*'] = "Minneapolis–Saint Paul"
name_to_team['Denver Nuggets'] = "Denver"
name_to_team['Los Angeles'] = "Los Angeles"
name_to_team['Los Angeles2'] = "Los Angeles"
name_to_team['Sacramento Kings'] = "Sacramento"
name_to_team['Dallas Mavericks'] = "Dallas–Fort Worth"
name_to_team['Memphis Grizzlies'] = "Memphis"
name_to_team['Phoenix Suns'] = "Phoenix"
nba_df["Metropolitan area"] = nba_df["team"].apply(lambda x: name_to_team[x])

In [12]:
# creating new column
nba_df["Metropolitan area"] = nba_df["team"].apply(lambda x: name_to_team[x])


In [13]:
# merging via inner join
t2 = pd.merge(nba_df, cities, how="inner", on="Metropolitan area")

# keeping only relevant columns
t2 = t2.loc[:,["team","W/L%","Metropolitan area","Population (2016 est.)[8]"]]

# creating pivot table for grouping
t2 = t2.pivot_table(index="Metropolitan area", aggfunc={"Population (2016 est.)[8]":keep, "W/L%":np.average})
t2 = t2.astype({"Population (2016 est.)[8]":"int64"})

results = stats.pearsonr(t2["Population (2016 est.)[8]"], t2["W/L%"])
results

(-0.17208033774089868, 0.4323775858417461)

# Question 3

In [14]:
#open the df
mlb_df = pd.read_csv("datasets/mlb.csv")

#create a mask and apply so that we only keep the 2018 data
mlb_df = mlb_df[mlb_df["year"] == 2018]

#convert the relevant data type so we are safe
mlb_df.astype({"W-L%":"float"})

Unnamed: 0,team,W,L,W-L%,GB,year,League
0,Boston Red Sox,108,54,0.667,--,2018,MLB
1,New York Yankees,100,62,0.617,8.0,2018,MLB
2,Tampa Bay Rays,90,72,0.556,18.0,2018,MLB
3,Toronto Blue Jays,73,89,0.451,35.0,2018,MLB
4,Baltimore Orioles,47,115,0.29,61.0,2018,MLB
5,Cleveland Indians,91,71,0.562,--,2018,MLB
6,Minnesota Twins,78,84,0.481,13.0,2018,MLB
7,Detroit Tigers,64,98,0.395,27.0,2018,MLB
8,Chicago White Sox,62,100,0.383,29.0,2018,MLB
9,Kansas City Royals,58,104,0.358,33.0,2018,MLB


In [15]:
#retrieving team names to create a suitable dictionary
lista = list(mlb_df["team"])

#creating a dictionary from it
name_to_team = {name:np.nan for name in lista}
name_to_team['Boston Red Sox'] = "Boston"
name_to_team['New York Yankees'] = "New York City"
name_to_team['Tampa Bay Rays'] = "Tampa Bay Area"
name_to_team['Toronto Blue Jays'] = "Toronto"
name_to_team['Baltimore Orioles'] = "Baltimore"
name_to_team['Cleveland Indians'] = "Cleveland"
name_to_team['Minnesota Twins'] = "Minneapolis–Saint Paul"
name_to_team['Detroit Tigers'] = "Detroit"
name_to_team['Chicago White Sox'] = "Chicago"
name_to_team['Kansas City Royals'] = "Kansas City"
name_to_team['Houston Astros'] = "Houston"
name_to_team['Oakland Athletics'] = "San Francisco Bay Area"
name_to_team['Seattle Mariners'] = "Seattle"
name_to_team['Los Angeles Angels'] = "Los Angeles"
name_to_team['Texas Rangers'] = "Dallas–Fort Worth"
name_to_team['Atlanta Braves'] = "Atlanta"
name_to_team['Washington Nationals'] = "Washington, D.C."
name_to_team['Philadelphia Phillies'] = "Philadelphia"
name_to_team['New York Mets'] = "New York City"
name_to_team['Miami Marlins'] = "Miami–Fort Lauderdale"
name_to_team['Milwaukee Brewers'] = "Milwaukee"
name_to_team['Chicago Cubs'] = "Chicago"
name_to_team['St. Louis Cardinals'] = "St. Louis"
name_to_team['Pittsburgh Pirates'] = "Pittsburgh"
name_to_team['Cincinnati Reds'] = "Cincinnati"
name_to_team['Los Angeles Dodgers'] = "Los Angeles"
name_to_team['Colorado Rockies'] = "Denver"
name_to_team['Arizona Diamondbacks'] = "Phoenix"
name_to_team['San Francisco Giants'] = "San Francisco Bay Area"
name_to_team['San Diego Padres'] = "San Diego"
name_to_team

{'Boston Red Sox': 'Boston',
 'New York Yankees': 'New York City',
 'Tampa Bay Rays': 'Tampa Bay Area',
 'Toronto Blue Jays': 'Toronto',
 'Baltimore Orioles': 'Baltimore',
 'Cleveland Indians': 'Cleveland',
 'Minnesota Twins': 'Minneapolis–Saint Paul',
 'Detroit Tigers': 'Detroit',
 'Chicago White Sox': 'Chicago',
 'Kansas City Royals': 'Kansas City',
 'Houston Astros': 'Houston',
 'Oakland Athletics': 'San Francisco Bay Area',
 'Seattle Mariners': 'Seattle',
 'Los Angeles Angels': 'Los Angeles',
 'Texas Rangers': 'Dallas–Fort Worth',
 'Atlanta Braves': 'Atlanta',
 'Washington Nationals': 'Washington, D.C.',
 'Philadelphia Phillies': 'Philadelphia',
 'New York Mets': 'New York City',
 'Miami Marlins': 'Miami–Fort Lauderdale',
 'Milwaukee Brewers': 'Milwaukee',
 'Chicago Cubs': 'Chicago',
 'St. Louis Cardinals': 'St. Louis',
 'Pittsburgh Pirates': 'Pittsburgh',
 'Cincinnati Reds': 'Cincinnati',
 'Los Angeles Dodgers': 'Los Angeles',
 'Colorado Rockies': 'Denver',
 'Arizona Diamondbacks'

In [16]:
# creating the new columnb in the MLB table
mlb_df["Metropolitan area"] = mlb_df["team"].apply(lambda x: name_to_team[x])


In [17]:
#merging tables

t3 = pd.merge(mlb_df, cities, how="inner", on="Metropolitan area")

# creating joint table via groupby and fix data type
t3 = t3.groupby("Metropolitan area").agg({"Population (2016 est.)[8]":keep, "W-L%":np.average})
t3 = t3.astype({"Population (2016 est.)[8]":"int64"})

#calculating resulting value
results = stats.pearsonr(t3["Population (2016 est.)[8]"], t3["W-L%"])
results[0]

0.15003737475409498

# Question 4

In [18]:
#creating df
nfl_df = pd.read_csv("datasets/nfl.csv")

#masking so only 2018 is relevant
nfl_df = nfl_df[nfl_df["year"] == 2018]

#deleting delinquent rows
nfl_df = nfl_df.drop([0,5,10,15,20,25,30,35])

# correcting data types
nfl_df = nfl_df.astype({"W-L%":"float"})


In [19]:
#creating list for mappings
lista = list(nfl_df["team"])

# creating dictionary via dic comp
name_to_team = {name:np.nan for name in lista}
name_to_team["New England Patriots*"] = "Boston"
name_to_team["Miami Dolphins"] = "Miami–Fort Lauderdale"
name_to_team["Buffalo Bills"] = "Buffalo"
name_to_team["New York Jets"] = "New York City"
name_to_team["Baltimore Ravens*"] = "Baltimore"
name_to_team["Pittsburgh Steelers"] = "Pittsburgh"
name_to_team["Cleveland Browns"] = "Cleveland"
name_to_team["Cincinnati Bengals"] = "Cincinnati"
name_to_team["Houston Texans*"] = "Houston"
name_to_team["Indianapolis Colts+"] = "Indianapolis"
name_to_team["Tennessee Titans"] = "Nashville"
name_to_team["Jacksonville Jaguars"] = "Jacksonville"
name_to_team["Kansas City Chiefs*"] = "Kansas City"
name_to_team["Los Angeles Chargers+"] = "Los Angeles"
name_to_team["Denver Broncos"] = "Denver"
name_to_team["Oakland Raiders"] = "San Francisco Bay Area"
name_to_team["Dallas Cowboys*"] = "Dallas–Fort Worth"
name_to_team["Philadelphia Eagles+"] = "Philadelphia"
name_to_team["Washington Redskins"] = "Washington, D.C."
name_to_team["New York Giants"] = "New York City"
name_to_team["Chicago Bears*"] = "Chicago"
name_to_team["Minnesota Vikings"] = "Minneapolis–Saint Paul"
name_to_team["Green Bay Packers"] = "Green Bay"
name_to_team["Detroit Lions"] = "Detroit"
name_to_team["New Orleans Saints*"] = "New Orleans"
name_to_team["Carolina Panthers"] = "Charlotte"
name_to_team["Atlanta Falcons"] = "Atlanta"
name_to_team["Tampa Bay Buccaneers"] = "Tampa Bay Area"
name_to_team["Los Angeles Rams*"] = "Los Angeles"
name_to_team["Seattle Seahawks+"] = "Seattle"
name_to_team["San Francisco 49ers"] = "San Francisco Bay Area"
name_to_team["Arizona Cardinals"] = "Phoenix"
name_to_team

{'New England Patriots*': 'Boston',
 'Miami Dolphins': 'Miami–Fort Lauderdale',
 'Buffalo Bills': 'Buffalo',
 'New York Jets': 'New York City',
 'Baltimore Ravens*': 'Baltimore',
 'Pittsburgh Steelers': 'Pittsburgh',
 'Cleveland Browns': 'Cleveland',
 'Cincinnati Bengals': 'Cincinnati',
 'Houston Texans*': 'Houston',
 'Indianapolis Colts+': 'Indianapolis',
 'Tennessee Titans': 'Nashville',
 'Jacksonville Jaguars': 'Jacksonville',
 'Kansas City Chiefs*': 'Kansas City',
 'Los Angeles Chargers+': 'Los Angeles',
 'Denver Broncos': 'Denver',
 'Oakland Raiders': 'San Francisco Bay Area',
 'Dallas Cowboys*': 'Dallas–Fort Worth',
 'Philadelphia Eagles+': 'Philadelphia',
 'Washington Redskins': 'Washington, D.C.',
 'New York Giants': 'New York City',
 'Chicago Bears*': 'Chicago',
 'Minnesota Vikings': 'Minneapolis–Saint Paul',
 'Green Bay Packers': 'Green Bay',
 'Detroit Lions': 'Detroit',
 'New Orleans Saints*': 'New Orleans',
 'Carolina Panthers': 'Charlotte',
 'Atlanta Falcons': 'Atlanta',
 

In [20]:
# creating new column
nfl_df["Metropolitan area"] = nfl_df["team"].apply(lambda x: name_to_team[x])


In [21]:
# creating joined table
t4 = pd.merge(nfl_df, cities, how="inner", on="Metropolitan area")

# summarizing via groupby and cleaning data type
t4 = t4.groupby(["Metropolitan area","team"]).agg({"Population (2016 est.)[8]":keep, "W-L%":np.average})
t4 = t4.astype({"Population (2016 est.)[8]":"int64"})

#calculating results
results = stats.pearsonr(t4["Population (2016 est.)[8]"], t4["W-L%"])
results

(-0.0489446577965297, 0.790226506674019)

# Question 5

In [29]:
# recreating tables with proper formatting
t1 = pd.merge(cities, nhl_df2, how="inner", on="Metropolitan area")
t1 = t1.pivot_table(index=["Metropolitan area"], aggfunc={"W/L Ratio":np.average,"team":keep})
t1 = t1.rename(columns={"team":"nhl_team"})


nba_df = pd.read_csv("datasets/nba.csv")
nba_df = nba_df[(nba_df["year"] == 2018)]
nba_df = nba_df.astype({"W/L%":"float"})
nba_df["team"] = nba_df["team"].apply(lambda x: " ".join(x.split()[0:2]) if len(x.split()) == 3 else " ".join(x.split()[0:3]))
nba_df["team"].loc[25] = 'Los Angeles2'
lista = list(nba_df["team"])
name_to_team = {name:"0" for name in lista}
name_to_team['Toronto Raptors*'] = "Toronto"
name_to_team['Boston Celtics*'] = "Boston"
name_to_team['Philadelphia 76ers*'] = "Philadelphia"
name_to_team['Cleveland Cavaliers*'] = "Cleveland"
name_to_team['Indiana Pacers*'] = "Indianapolis"
name_to_team['Miami Heat*'] = "Miami–Fort Lauderdale"
name_to_team['Milwaukee Bucks*'] = "Milwaukee"
name_to_team['Washington Wizards*'] = "Washington, D.C."
name_to_team['Detroit Pistons'] = "Detroit"
name_to_team['Charlotte Hornets'] = "Charlotte"
name_to_team['New York'] = "New York City"
name_to_team['Brooklyn Nets'] = "New York City"
name_to_team['Chicago Bulls'] = "Chicago"
name_to_team['Orlando Magic'] = "Orlando"
name_to_team['Atlanta Hawks'] = "Atlanta"
name_to_team['Houston Rockets*'] = "Houston"
name_to_team['Golden State'] = "San Francisco Bay Area"
name_to_team['Portland Trail'] = "Portland"
name_to_team['Oklahoma City'] = "Oklahoma City"
name_to_team['Utah Jazz*'] = "Salt Lake City"
name_to_team['New Orleans'] = "New Orleans"
name_to_team['San Antonio'] = "San Antonio"
name_to_team['Minnesota Timberwolves*'] = "Minneapolis–Saint Paul"
name_to_team['Denver Nuggets'] = "Denver"
name_to_team['Los Angeles'] = "Los Angeles"
name_to_team['Los Angeles2'] = "Los Angeles"
name_to_team['Sacramento Kings'] = "Sacramento"
name_to_team['Dallas Mavericks'] = "Dallas–Fort Worth"
name_to_team['Memphis Grizzlies'] = "Memphis"
name_to_team['Phoenix Suns'] = "Phoenix"
nba_df["Metropolitan area"] = nba_df["team"].apply(lambda x: name_to_team[x])
t2 = pd.merge(nba_df, cities, how="inner", on="Metropolitan area")
t2 = t2.pivot_table(index=["Metropolitan area"], aggfunc={"W/L%":np.average,"team":keep})
t2 = t2.rename(columns={"team":"nba_team"})

t3 = pd.merge(mlb_df, cities, how="inner", on="Metropolitan area")
t3 = t3.groupby(["Metropolitan area"]).agg({"W-L%":np.average,"team":keep})
t3 = t3.rename(columns={"team":"mlb_team"})

t4 = pd.merge(nfl_df, cities, how="inner", on="Metropolitan area")
t4 = t4.groupby(["Metropolitan area"]).agg({"W-L%":np.average,"team":keep})
t4 = t4.rename(columns={"team":"nfl_team"})

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0_level_0,W/L%,nba_team
Metropolitan area,Unnamed: 1_level_1,Unnamed: 2_level_1
Atlanta,0.293,Atlanta Hawks
Boston,0.671,Boston Celtics*
Charlotte,0.439,Charlotte Hornets
Chicago,0.329,Chicago Bulls
Cleveland,0.61,Cleveland Cavaliers*
Dallas–Fort Worth,0.293,Dallas Mavericks
Denver,0.561,Denver Nuggets
Detroit,0.476,Detroit Pistons
Houston,0.793,Houston Rockets*
Indianapolis,0.585,Indiana Pacers*


In [43]:
# this creates the right table but i need to fix the NAs and do somemore adjustments
pd.concat([t1,t2,t3,t4],axis=1)

Unnamed: 0_level_0,W/L Ratio,nhl_team,W/L%,nba_team,W-L%,team,W-L%,team
Metropolitan area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Boston,0.714286,Boston Bruins*,0.671,Boston Celtics*,0.667,Boston Red Sox,0.688,New England Patriots*
Buffalo,0.357143,Buffalo Sabres,,,,,0.375,Buffalo Bills
Calgary,0.513889,Calgary Flames,,,,,,
Chicago,0.458333,Chicago Blackhawks,0.329,Chicago Bulls,0.483,Chicago White Sox,0.75,Chicago Bears*
Columbus,0.6,Columbus Blue Jackets*,,,,,,
Dallas–Fort Worth,0.567568,Dallas Stars,0.293,Dallas Mavericks,0.414,Texas Rangers,0.625,Dallas Cowboys*
Denver,0.589041,Colorado Avalanche*,0.561,Denver Nuggets,0.558,Colorado Rockies,0.375,Denver Broncos
Detroit,0.434783,Detroit Red Wings,0.476,Detroit Pistons,0.395,Detroit Tigers,0.375,Detroit Lions
Edmonton,0.473684,Edmonton Oilers,,,,,,
Las Vegas,0.68,Vegas Golden Knights*,,,,,,
