# Geographical Visualizations with Python

### 1. Import Data and Libraries

In [3]:
import pandas as pd
import numpy as np 
import os
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import geopandas as gpd
import geoplot
import plotly.express as px
import json
from sklearn.impute import KNNImputer
import sklearn
from sklearn.cluster import KMeans
from scipy.interpolate import UnivariateSpline
import geojson
import requests

In [4]:
# This command propts matplotlib visuals to appear in the notebook 

%matplotlib inline

In [5]:
# Import ".json" file for the U.S. 

nfl_geo = r'C:\Users\gre60\OneDrive\CareerFoundry Data Analytics Program\Data Immersion\Achievement 6\NFL Final Project 2\stadiums.geojson'

In [6]:
# That's just in case you want to look at the JSON file contents here too:

f = open(r'C:\Users\gre60\OneDrive\CareerFoundry Data Analytics Program\Data Immersion\Achievement 6\NFL Final Project 2\stadiums.geojson',)
  
# returns JSON object asa dictionary
data = geojson.load(f)
  
# Iterating through the json list
for i in data['features']:
    print(i)

{"geometry": {"coordinates": [-74.074444, 40.813611], "type": "Point"}, "properties": {"Conference": "NFC", "Lat": 40.813611, "League": "NFL", "Long": -74.074444, "Stadium": "MetLife Stadium", "Team": "New York Giants"}, "type": "Feature"}
{"geometry": {"coordinates": [-74.074444, 40.813611], "type": "Point"}, "properties": {"Conference": "AFC", "Lat": 40.813611, "League": "NFL", "Long": -74.074444, "Stadium": "MetLife Stadium", "Team": "New York Jets"}, "type": "Feature"}
{"geometry": {"coordinates": [-88.062222, 44.501389], "type": "Point"}, "properties": {"Conference": "NFC", "Lat": 44.501389, "League": "NFL", "Long": -88.062222, "Stadium": "Lambeau Field", "Team": "Green Bay Packers"}, "type": "Feature"}
{"geometry": {"coordinates": [-97.092778, 32.747778], "type": "Point"}, "properties": {"Conference": "NFC", "Lat": 32.747778, "League": "NFL", "Long": -97.092778, "Stadium": "AT&T Stadium", "Team": "Dallas Cowboys"}, "type": "Feature"}
{"geometry": {"coordinates": [-76.864444, 38.9

In [7]:
# Create file path
path = r'C:\Users\gre60\OneDrive\CareerFoundry Data Analytics Program\Data Immersion\Achievement 6\NFL Final Project 2'

In [8]:
# Import NFL Spread Analysis Data
df = pd.read_csv(os.path.join(path, 'updated_nfl_spread_analysis.csv'))

In [9]:
df.head()

Unnamed: 0.1,Unnamed: 0,game_id,season,game_type,week,gameday,weekday,gametime,away_team,away_score,...,temp,wind,stadium_id,stadium,home_market,home_name,away_market,away_name,home_division,away_division
0,0,1999_01_MIN_ATL,1999,REG,1,9/12/1999,Sunday,,MIN,17.0,...,,,ATL00,Georgia Dome,Atlanta,Falcons,Minnesota,Vikings,NFC South,NFC North
1,1,1999_01_KC_CHI,1999,REG,1,9/12/1999,Sunday,,KC,17.0,...,80.0,12.0,CHI98,Soldier Field,Chicago,Bears,Kansas City,Chiefs,NFC North,AFC West
2,2,1999_01_PIT_CLE,1999,REG,1,9/12/1999,Sunday,,PIT,43.0,...,78.0,12.0,CLE00,Cleveland Browns Stadium,Cleaveland,Browns,Pittsburgh,Steelers,AFC North,AFC North
3,3,1999_01_OAK_GB,1999,REG,1,9/12/1999,Sunday,,OAK,24.0,...,67.0,10.0,GNB00,Lambeau Field,Green Bay,Packers,Oakland,Raiders,NFC North,AFC West
4,4,1999_01_BUF_IND,1999,REG,1,9/12/1999,Sunday,,BUF,14.0,...,,,IND99,RCA Dome,Indianapolis,Colts,Buffalo,Bills,AFC South,AFC East


In [10]:
df.columns.to_list()

['Unnamed: 0',
 'game_id',
 'season',
 'game_type',
 'week',
 'gameday',
 'weekday',
 'gametime',
 'away_team',
 'away_score',
 'home_team',
 'home_score',
 'location',
 'actual_spread',
 'actual_total',
 'overtime',
 'away_rest',
 'home_rest',
 'projected_spread',
 'projected_total',
 'div_game',
 'stadium type',
 'surface',
 'temp',
 'wind',
 'stadium_id',
 'stadium',
 'home_market',
 'home_name',
 'away_market',
 'away_name',
 'home_division',
 'away_division']

In [11]:
df = df.drop(['Unnamed: 0'], axis=1)
df.columns.to_list()

['game_id',
 'season',
 'game_type',
 'week',
 'gameday',
 'weekday',
 'gametime',
 'away_team',
 'away_score',
 'home_team',
 'home_score',
 'location',
 'actual_spread',
 'actual_total',
 'overtime',
 'away_rest',
 'home_rest',
 'projected_spread',
 'projected_total',
 'div_game',
 'stadium type',
 'surface',
 'temp',
 'wind',
 'stadium_id',
 'stadium',
 'home_market',
 'home_name',
 'away_market',
 'away_name',
 'home_division',
 'away_division']

In [12]:
df['home_market'].value_counts()

home_market
New York         427
New England      239
Philadelphia     228
Kansas City      228
Green Bay        226
Pittsburgh       225
Seattle          224
Indianapolis     223
Denver           221
New Orleans      221
San Francisco    221
Tampa Bay        220
Baltimore        219
Chicago          218
Minnesota        217
Dallas           217
Atlanta          217
Buffalo          217
Tennessee        217
Cincinnati       216
Carolina         216
Arizona          215
Washington       214
Jacksonville     214
Miami            213
Detroit          212
Cleaveland       210
Houston          193
Oakland          173
San Diego        149
Los Angeles      145
St Louis         141
Las Vegas         42
Name: count, dtype: int64

In [13]:
df['home_market'] = df['home_market'].replace('Cleaveland', 'Cleveland')
df['home_market'].value_counts()

home_market
New York         427
New England      239
Philadelphia     228
Kansas City      228
Green Bay        226
Pittsburgh       225
Seattle          224
Indianapolis     223
Denver           221
New Orleans      221
San Francisco    221
Tampa Bay        220
Baltimore        219
Chicago          218
Minnesota        217
Dallas           217
Atlanta          217
Buffalo          217
Tennessee        217
Cincinnati       216
Carolina         216
Arizona          215
Washington       214
Jacksonville     214
Miami            213
Detroit          212
Cleveland        210
Houston          193
Oakland          173
San Diego        149
Los Angeles      145
St Louis         141
Las Vegas         42
Name: count, dtype: int64

In [14]:
# Create full team name column
df['home_full_name'] = df['home_market'] + ' ' + df['home_name']
df['away_full_name'] = df['away_market'] + ' ' + df['away_name']
df.head()

Unnamed: 0,game_id,season,game_type,week,gameday,weekday,gametime,away_team,away_score,home_team,...,stadium_id,stadium,home_market,home_name,away_market,away_name,home_division,away_division,home_full_name,away_full_name
0,1999_01_MIN_ATL,1999,REG,1,9/12/1999,Sunday,,MIN,17.0,ATL,...,ATL00,Georgia Dome,Atlanta,Falcons,Minnesota,Vikings,NFC South,NFC North,Atlanta Falcons,Minnesota Vikings
1,1999_01_KC_CHI,1999,REG,1,9/12/1999,Sunday,,KC,17.0,CHI,...,CHI98,Soldier Field,Chicago,Bears,Kansas City,Chiefs,NFC North,AFC West,Chicago Bears,Kansas City Chiefs
2,1999_01_PIT_CLE,1999,REG,1,9/12/1999,Sunday,,PIT,43.0,CLE,...,CLE00,Cleveland Browns Stadium,Cleveland,Browns,Pittsburgh,Steelers,AFC North,AFC North,Cleveland Browns,Pittsburgh Steelers
3,1999_01_OAK_GB,1999,REG,1,9/12/1999,Sunday,,OAK,24.0,GB,...,GNB00,Lambeau Field,Green Bay,Packers,Oakland,Raiders,NFC North,AFC West,Green Bay Packers,Oakland Raiders
4,1999_01_BUF_IND,1999,REG,1,9/12/1999,Sunday,,BUF,14.0,IND,...,IND99,RCA Dome,Indianapolis,Colts,Buffalo,Bills,AFC South,AFC East,Indianapolis Colts,Buffalo Bills


In [15]:
df['home_full_name'].value_counts()

home_full_name
New England Patriots     239
Philadelphia Eagles      228
Kansas City Chiefs       228
Green Bay Packers        226
Pittsburgh Steelers      225
Seattle Seahawks         224
Indianapolis Colts       223
Denver Broncos           221
New Orleans Saints       221
San Francisco 49ers      221
Tampa Bay Buccaneers     220
Baltimore Ravens         219
Chicago Bears            218
Buffalo Bills            217
Atlanta Falcons          217
Tennessee Titans         217
Dallas Cowboys           217
Minnesota Vikings        217
New York Giants          216
Carolina Panthers        216
Cincinnati Bengals       216
Arizona Cardinals        215
Washington Commanders    214
Jacksonville Jaguars     214
Miami Dolphins           213
Detroit Lions            212
New York Jets            211
Cleveland Browns         210
Houston Texans           193
Oakland Raiders          173
San Diego Chargers       149
St Louis Rams            141
Los Angeles Rams          79
Los Angeles Chargers      66

In [16]:
df.isnull().sum()

game_id                0
season                 0
game_type              0
week                   0
gameday                0
weekday                0
gametime             259
away_team              0
away_score           272
home_team              0
home_score           272
location               0
actual_spread        272
actual_total         272
overtime             272
away_rest              0
home_rest              0
projected_spread     256
projected_total      256
div_game               0
stadium type           0
surface               41
temp                2144
wind                2144
stadium_id             0
stadium                0
home_market            0
home_name              0
away_market            0
away_name              0
home_division          0
away_division          0
home_full_name         0
away_full_name         0
dtype: int64

In [17]:
# Filter out 2024 data
df = df.loc[df['season'] != 2024]
df.isnull().sum()

game_id                0
season                 0
game_type              0
week                   0
gameday                0
weekday                0
gametime             259
away_team              0
away_score             0
home_team              0
home_score             0
location               0
actual_spread          0
actual_total           0
overtime               0
away_rest              0
home_rest              0
projected_spread       0
projected_total        0
div_game               0
stadium type           0
surface               41
temp                1872
wind                1872
stadium_id             0
stadium                0
home_market            0
home_name              0
away_market            0
away_name              0
home_division          0
away_division          0
home_full_name         0
away_full_name         0
dtype: int64

In [18]:
nfl_geo = r'C:\Users\gre60\OneDrive\CareerFoundry Data Analytics Program\Data Immersion\Achievement 6\NFL Final Project 2/stadiums.geojson'

In [19]:
f = open(r'C:\Users\gre60\OneDrive\CareerFoundry Data Analytics Program\Data Immersion\Achievement 6\NFL Final Project 2/stadiums.geojson',)
  
# returns JSON object asa dictionary
data = geojson.load(f)
  
# Iterating through the json list
for i in data['features']:
    print(i)

{"geometry": {"coordinates": [-74.074444, 40.813611], "type": "Point"}, "properties": {"Conference": "NFC", "Lat": 40.813611, "League": "NFL", "Long": -74.074444, "Stadium": "MetLife Stadium", "Team": "New York Giants"}, "type": "Feature"}
{"geometry": {"coordinates": [-74.074444, 40.813611], "type": "Point"}, "properties": {"Conference": "AFC", "Lat": 40.813611, "League": "NFL", "Long": -74.074444, "Stadium": "MetLife Stadium", "Team": "New York Jets"}, "type": "Feature"}
{"geometry": {"coordinates": [-88.062222, 44.501389], "type": "Point"}, "properties": {"Conference": "NFC", "Lat": 44.501389, "League": "NFL", "Long": -88.062222, "Stadium": "Lambeau Field", "Team": "Green Bay Packers"}, "type": "Feature"}
{"geometry": {"coordinates": [-97.092778, 32.747778], "type": "Point"}, "properties": {"Conference": "NFC", "Lat": 32.747778, "League": "NFL", "Long": -97.092778, "Stadium": "AT&T Stadium", "Team": "Dallas Cowboys"}, "type": "Feature"}
{"geometry": {"coordinates": [-76.864444, 38.9

In [20]:
# Select only neccessary columns team dummy variables

columns = ['home_full_name',
           'stadium',
           'projected_spread',
           'actual_spread',
           'projected_total',
           'actual_total',
           'temp',
           'wind'
          ]

In [21]:
# Create team subset
team_rec = df[columns]

In [22]:
team_rec.head()

Unnamed: 0,home_full_name,stadium,projected_spread,actual_spread,projected_total,actual_total,temp,wind
0,Atlanta Falcons,Georgia Dome,-4.0,-3.0,49.0,31.0,,
1,Chicago Bears,Soldier Field,-3.0,3.0,38.0,37.0,80.0,12.0
2,Cleveland Browns,Cleveland Browns Stadium,-6.0,-43.0,37.0,43.0,78.0,12.0
3,Green Bay Packers,Lambeau Field,9.0,4.0,43.0,52.0,67.0,10.0
4,Indianapolis Colts,RCA Dome,-3.0,17.0,45.5,45.0,,


In [23]:
df.rename(columns={'stadium': 'Stadium'}, inplace=True)

In [24]:
df.head()

Unnamed: 0,game_id,season,game_type,week,gameday,weekday,gametime,away_team,away_score,home_team,...,stadium_id,Stadium,home_market,home_name,away_market,away_name,home_division,away_division,home_full_name,away_full_name
0,1999_01_MIN_ATL,1999,REG,1,9/12/1999,Sunday,,MIN,17.0,ATL,...,ATL00,Georgia Dome,Atlanta,Falcons,Minnesota,Vikings,NFC South,NFC North,Atlanta Falcons,Minnesota Vikings
1,1999_01_KC_CHI,1999,REG,1,9/12/1999,Sunday,,KC,17.0,CHI,...,CHI98,Soldier Field,Chicago,Bears,Kansas City,Chiefs,NFC North,AFC West,Chicago Bears,Kansas City Chiefs
2,1999_01_PIT_CLE,1999,REG,1,9/12/1999,Sunday,,PIT,43.0,CLE,...,CLE00,Cleveland Browns Stadium,Cleveland,Browns,Pittsburgh,Steelers,AFC North,AFC North,Cleveland Browns,Pittsburgh Steelers
3,1999_01_OAK_GB,1999,REG,1,9/12/1999,Sunday,,OAK,24.0,GB,...,GNB00,Lambeau Field,Green Bay,Packers,Oakland,Raiders,NFC North,AFC West,Green Bay Packers,Oakland Raiders
4,1999_01_BUF_IND,1999,REG,1,9/12/1999,Sunday,,BUF,14.0,IND,...,IND99,RCA Dome,Indianapolis,Colts,Buffalo,Bills,AFC South,AFC East,Indianapolis Colts,Buffalo Bills


In [25]:
df['home_full_name'].value_counts()

home_full_name
New England Patriots     231
Kansas City Chiefs       220
Philadelphia Eagles      219
Pittsburgh Steelers      217
Green Bay Packers        217
Indianapolis Colts       215
Seattle Seahawks         215
Denver Broncos           213
New Orleans Saints       212
San Francisco 49ers      212
Baltimore Ravens         211
Tampa Bay Buccaneers     211
Buffalo Bills            209
Tennessee Titans         209
Chicago Bears            209
Cincinnati Bengals       208
Atlanta Falcons          208
Minnesota Vikings        208
Dallas Cowboys           208
New York Giants          207
Carolina Panthers        207
Jacksonville Jaguars     206
Arizona Cardinals        206
Miami Dolphins           205
Washington Commanders    205
Detroit Lions            203
New York Jets            203
Cleveland Browns         202
Houston Texans           185
Oakland Raiders          173
San Diego Chargers       149
St Louis Rams            141
Los Angeles Rams          70
Los Angeles Chargers      58

In [26]:
team_rec.head()

Unnamed: 0,home_full_name,stadium,projected_spread,actual_spread,projected_total,actual_total,temp,wind
0,Atlanta Falcons,Georgia Dome,-4.0,-3.0,49.0,31.0,,
1,Chicago Bears,Soldier Field,-3.0,3.0,38.0,37.0,80.0,12.0
2,Cleveland Browns,Cleveland Browns Stadium,-6.0,-43.0,37.0,43.0,78.0,12.0
3,Green Bay Packers,Lambeau Field,9.0,4.0,43.0,52.0,67.0,10.0
4,Indianapolis Colts,RCA Dome,-3.0,17.0,45.5,45.0,,


In [33]:
m = folium.Map(location=[37.8, -96], zoom_start=4)

# Add choropleth layer
folium.Choropleth(
    geo_data=nfl_geo,
    name='choropleth',
    data=team_rec,
    columns=['home_full_name', 'actual_total'],  # Replace 'state' and 'value' with your column names
    key_on='feature.properties.Team',  # Replace 'id' with the property in your GeoJSON file
    fill_color='YlGn',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Actual Total'
).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

from IPython.display import display
display(m)

# Save the map
m.save('nfl_choropleth_map.html')

### Export Dataframe

In [29]:
df.to_csv(os.path.join(path, 'nfl_geo_spread_analysis.csv'))