# Mapping in Python

## Read in the required packages

In [0]:
#!pip install gapminder # Method to install gapminder on Google Colab
import folium
from folium.plugins import FastMarkerCluster
import pandas as pd
import gapminder
from datetime import datetime as dt
import vega_datasets as datasets

Collecting gapminder
  Downloading https://files.pythonhosted.org/packages/85/83/57293b277ac2990ea1d3d0439183da8a3466be58174f822c69b02e584863/gapminder-0.1-py3-none-any.whl
Installing collected packages: gapminder
Successfully installed gapminder-0.1


## Creating Base Maps

In [0]:
m = folium.Map(location=[38.9072, -77.0369],
               width=750,
               height=500,
               zoom_start=12)
m

In [0]:
m = folium.Map(location=[38.9072, -77.0369],
               width=750,
               height=500,
               zoom_start=12,
               tiles='Stamen Toner') # Specify the desired tile types
m

In [0]:
m = folium.Map(location=[38.9072, -77.0369],
               width=750,
               height=500,
               zoom_start=12,
               tiles='Stamen Terrain') # Specify the desired tile types
m

## Visualizing Gapminder Life Expectancy Data

In [0]:
# Read in our data
data = gapminder.gapminder
data.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106


In [0]:
# Define and save our base map
m = folium.Map(location=[0, 0], zoom_start=1, width=750, height=500)

# Pull shapefiles (polygons) for every country
url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
world_geo = f'{url}/world-countries.json'

# Create a folium map
folium.Choropleth(geo_data=world_geo, # The GeoJSON file with our shapes
                  data=data[data['year']==2002], # The data we will be using
                  columns=['country', 'lifeExp'], # The columns we need from our data
                  key_on='feature.properties.name', # The 'key' to join on in our GeoJSON
                  fill_color='YlGnBu', # The color scale we want to use
                  fill_opacity=0.7, # Different styling customizations 
                  line_opacity=0.2).add_to(m) # Add these options to the base map

m # Display the map

In [0]:
# Fix naming conventions
data = data.replace('United States', 'United States of America')

# Rerun our map with corrected name
m = folium.Map(location=[0, 0], zoom_start=1, width=750, height=500)
folium.Choropleth(geo_data=world_geo, data=data[data['year']==2002],
             columns=['country', 'lifeExp'],
             key_on='feature.properties.name',
             fill_color='RdYlGn', # Change color scale
             fill_opacity=0.7, 
             line_opacity=0.2).add_to(m)

m

## Air Accident Analysis
For this analysis we will use data from the National Transportation Safety Board (NTSB). "The NTSB aviation accident database contains information from 1962 and later about civil aviation accidents and selected incidents within the United States, its territories and possessions, and in international waters..."

In [0]:
# Read in data from NTSB
url = 'http://app.ntsb.gov/aviationquery/Download.ashx?type=csv'
data = pd.read_csv(url,sep='|', engine='python')

In [0]:
# Clean up the column headers
cols = list(data.columns)
cols = [x.strip() for x in cols]
data.columns = cols

In [0]:
# Explore a sample of the data
data.head()

Unnamed: 0,Event Id,Investigation Type,Accident Number,Event Date,Location,Country,Latitude,Longitude,Airport Code,Airport Name,Injury Severity,Aircraft Damage,Aircraft Category,Registration Number,Make,Model,Amateur Built,Number of Engines,Engine Type,FAR Description,Schedule,Purpose of Flight,Air Carrier,Total Fatal Injuries,Total Serious Injuries,Total Minor Injuries,Total Uninjured,Weather Condition,Broad Phase of Flight,Report Status,Publication Date,Unnamed: 32
0,20191015X24741,Accident,GAA20CA014,10/15/2019,"Colorado Springs, CO",United States,38.942777,-104.57,FLY,Meadow Lake,Unavailable,Substantial,Airplane,N9708T,Piper,PA38,No,,,Part 91: General Aviation,,Other Work Use,,,,,,,,Preliminary,10/15/2019,
1,20191015X65228,Accident,GAA20CA022,10/14/2019,"Douglas, GA",United States,31.476667,-82.860556,DQH,,Unavailable,,Airplane,N8047R,Beech,36,No,,,Part 91: General Aviation,,Personal,,,,,,,,Preliminary,10/17/2019,
2,20191014X71714,Accident,GAA20CA013,10/14/2019,"Breckenridge, TX",United States,32.718889,-98.891666,BKD,Stephens County,Non-Fatal,Substantial,Airplane,N850NK,SOCATA,TBM700,No,1.0,,Part 91: General Aviation,,Personal,,,,,1.0,VMC,,Preliminary,10/15/2019,
3,20191012X90206,Accident,GAA20CA011,10/12/2019,"Island Park, ID",United States,44.673611,-111.424445,,,Unavailable,Substantial,Airplane,N632K,Champion,8GCBC,No,,,Part 91: General Aviation,,Personal,,,,,,,,Preliminary,10/15/2019,
4,20191011X13518,Accident,GAA20CA020,10/11/2019,"Willacoochee, GA",United States,31.398889,-83.001389,,,Non-Fatal,Substantial,Airplane,N3090C,Ayres,S2R,No,1.0,,Part 137: Agricultural,,Aerial Application,,,,1.0,,VMC,,Preliminary,10/18/2019,


In [0]:
# Subset to accidents since the start of the year
data['Event Date'] = [dt.strptime(a, ' %m/%d/%Y ') for a in data['Event Date']]
data = data[data['Event Date']>=dt(2019,1,1)]

# Extract state from location
data['state_label'] = [a.strip()[-2:] for a in data['Location']]

In [0]:
# Count Accidents by State
state_counts = pd.DataFrame(data['state_label'].value_counts())
state_counts = state_counts.rename(columns={'state_label':'Accident Count'})
state_counts['State'] = state_counts.index

# Develop a list of States (removes most international locations)
state_list = [x for x in list(state_counts['State']) if x.isupper()]
state_list.remove('VI') # Revmoe Virgin Islands
state_list.remove('GM') # Remove Germany?
state_list.remove('MP') # Remove Northern Mariana Islands
state_list.remove('AO') # Remove Angola?

# Drop observations if not in state list
state_counts = state_counts[state_counts.index.isin(state_list)]

In [0]:
# Define and print choropleth map

url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
state_geo = f'{url}/us-states.json'

m = folium.Map(location=[48, -102], zoom_start=3)

folium.Choropleth(
    geo_data=state_geo,
    name='choropleth',
    data=state_counts,
    columns=['State', 'Accident Count'],
    key_on='feature.id',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Accidents'
).add_to(m)

folium.LayerControl().add_to(m) # Add a layer control to the map, you'll see why next

m

In [0]:
# Plot Accidents Individually on Map

# Transform accident data to select only those accidents in the US with a Lat and Long
accident_data = data[data['state_label'].isin(state_list)]
accident_data = accident_data[accident_data['Latitude'] != "  "]
accident_data = accident_data[accident_data['Longitude'] != "  "]

# Convert every lat and long to a float
accident_data['Latitude'] = [float(x) for x in accident_data['Latitude']]
accident_data['Longitude'] = [float(x) for x in accident_data['Longitude']]

# Iterate through your dataframe adding points one at a time
for index,row in accident_data.iterrows():

  # Add a circle marker to the map
  folium.CircleMarker((row['Latitude'], # Use this as the Latitude
                       row['Longitude']), # Use this as the Longitude
                       radius=0.5, # Point size
                       tooltip=' '.join([row['Make'],row['Model']]), # Make an interactive tooltip
                       color='#316CE1').add_to(m) # Color the points and save to a map

m.save('air_accidents.html') # Export your map to an HTML file

In [0]:
m