# Project 1 - Mapping

Data from: Heyes, Anthony, and Soodeh Saberian. 2019. "Temperature and Decisions: Evidence from 207,000 Court Cases." American Economic Journal: Applied Economics, 11 (2): 238–65.

Notebooks used troughout the code: 
- ECO225 - Mapping Notebook

In [None]:
!pip install geopandas plotly
!pip install geopandas geopy
!pip install cartopy
!pip install shapely

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import shapely

In [None]:
#Obtain shape file for US
state_df = gpd.read_file("http://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_us_state_5m.zip")
state_df = state_df.rename(columns={"NAME":"Province_State"})
state_df

In [None]:
#Obtain dataset
df = pd.read_stata('matched_corrected.dta')
unique_counts = df['id'].value_counts()
print(unique_counts)

In [None]:
#Cities Coordinates
city_coordinates = {
    "NEW YORK": (40.7128, -74.0060),
    "MIAMI": (25.7617, -80.1918),
    "LOS ANGELES": (34.0522, -118.2437),
    "SAN FRANCISCO": (37.7749, -122.4194),
    "CHICAGO": (41.8781, -87.6298),
    "NEWARK": (40.7357, -74.1724),
    "BALTIMORE": (39.2904, -76.6122),
    "ORLANDO": (28.5383, -81.3792),
    "ARLINGTON": (32.7357, -97.1081),
    "PHILADELPHIA": (39.9526, -75.1652),
    "BOSTON": (42.3601, -71.0589),
    "SAN DIEGO": (32.7157, -117.1611),
    "HOUSTON": (29.7604, -95.3698),
    "MEMPHIS": (35.1495, -90.0490),
    "DALLAS": (32.7767, -96.7970),
    "DETROIT": (42.3314, -83.0458),
    "DENVER": (39.7392, -104.9903),
    "ATLANTA": (33.4484, -84.9877),
    "HARTFORD": (41.7637, -72.6851),
    "LAS VEGAS": (36.1699, -115.1398),
    "SEATTLE": (47.6062, -122.3321),
    "PHOENIX": (33.4484, -112.0740),
    "NEW ORLEANS": (29.9511, -90.0715),
    "EL PASO": (31.7619, -106.4850),
    "SAN PEDRO": (33.7361, -118.2925),
    "HONOLULU": (21.3069, -157.8583),
    "SAN ANTONIO": (29.4241, -98.4936),
    "BUFFALO": (42.8864, -78.8784),
    "LANCASTER": (40.0379, -76.3028),
    "PORTLAND": (45.5235, -122.6762),
    "BRADENTON": (27.4989, -82.5754),
    "YORK": (39.9625, -76.7277),
    "IMPERIAL": (32.8356, -115.5695),
    "GUAYNABO": (18.3976, -66.1534),
    "TUCSON": (32.2226, -110.9747),
}

# Convert city coordinates to dataframe 
coordinates_df = pd.DataFrame.from_dict(city_coordinates, orient='index', columns=['latitude', 'longitude'])
coordinates_df.index.name = 'id'
coordinates_df.reset_index(inplace=True)

# Merge the coordinates
df_merged = pd.merge(df, coordinates_df, on='id', how='left')

print(df_merged)

In [None]:
#Calculate the average resolution and temperature per city, including lat and lon
city_avg = df_merged.groupby('city').agg(
    avg_res=('res', 'mean'),
    avg_temp=('temp6t4', 'mean'),
    lat=('latitude', 'first'),
    lon=('longitude', 'first')
).reset_index()

print(city_avg)

In [None]:
#Map Cities to State
city_state_mapping = {
    "NEW YORK": "NY",
    "MIAMI": "FL",
    "LOS ANGELES": "CA",
    "SAN FRANCISCO": "CA",
    "CHICAGO": "IL",
    "NEWARK": "NJ",
    "BALTIMORE": "MD",
    "ORLANDO": "FL",
    "ARLINGTON": "TX",
    "PHILADELPHIA": "PA",
    "BOSTON": "MA",
    "SAN DIEGO": "CA",
    "HOUSTON": "TX",
    "MEMPHIS": "TN",
    "DALLAS": "TX",
    "DETROIT": "MI",
    "DENVER": "CO",
    "ATLANTA": "GA",
    "HARTFORD": "CT",
    "LAS VEGAS": "NV",
    "SEATTLE": "WA",
    "PHOENIX": "AZ",
    "NEW ORLEANS": "LA",
    "EL PASO": "TX",
    "SAN PEDRO": "CA",
    "HONOLULU": "HI",
    "SAN ANTONIO": "TX",
    "BUFFALO": "NY",
    "LANCASTER": "PA",
    "PORTLAND": "OR",
    "BRADENTON": "FL",
    "YORK": "PA",
    "IMPERIAL": "CA",
    "GUAYNABO": "PR",  
    "TUCSON": "AZ",
}

# Create a DataFrame from the city-state mapping
city_state_df = pd.DataFrame(list(city_state_mapping.items()), columns=['city', 'state'])

# Merge city_avg with city_state_df on 'city' columns
city_avg = city_avg.merge(city_state_df, on='city', how='left')

print(city_avg)

In [None]:
# Create the figure and axis
fig, axs = plt.subplots(figsize=(15, 10))

map_color = '#D7CCC8'  
point_color = '#F44336'  


# Plot the US states with borders
state_df.boundary.plot(ax=axs, linewidth=1, color='black')
state_df.plot(ax=axs, color="white", edgecolor='white')  # Fill states

# Plot each city as a pin
for lat, lon in city_coordinates.values():
    axs.plot(lon, lat, marker='D', color=point_color, markersize=10)

# Axis format
axs.set_xlim(-130, -60)
axs.set_ylim(20, 50)
axs.set_axis_off()
axs.grid(color='white', linestyle='--', linewidth=0.5)

plt.show()

In [None]:
#Update dataframe
state_avg = city_avg.groupby('state').agg(
    avg_resolution=('avg_res', 'mean'),
    avg_temperature=('avg_temp', 'mean')
).reset_index()

print(state_avg)

In [None]:
state_df = state_df.merge(state_avg, left_on='STUSPS', right_on='state', how='left')
state_df

In [None]:
#Remove islands for better visibility of map
state_df = state_df[~state_df['Province_State'].isin(['Alaska', 'Hawaii','American Samoa','Guam',
                                                      'United States Virgin Islands',
                                                      'Commonwealth of the Northern Mariana Islands'])]

In [None]:
#Map for all US states 
fig, rax = plt.subplots(figsize=(50, 20)) 

state_df.plot(ax=rax, edgecolor="black", color="white")

rax.set_xlabel('longitude')
rax.set_ylabel('latitude')

rax.spines['top'].set_visible(False) #drop the top spine
rax.spines['right'].set_visible(False) #drop the right spine
plt.axis('off')

state_df.plot(
     ax=rax, edgecolor='black', column='avg_resolution', 
     legend=True, cmap='PuBu', 
     vmin=0, vmax=1)
plt.show()

In [None]:
#Map for all US states 

fig, tax = plt.subplots(figsize=(50, 20)) 

state_df.plot(ax=tax, edgecolor="black", color="white")

tax.set_xlabel('longitude')
tax.set_ylabel('latitude')

tax.spines['top'].set_visible(False) 
tax.spines['right'].set_visible(False) 
plt.axis('off')

state_df.plot(
     ax=tax, edgecolor='black', column='avg_temperature', 
     legend=True, cmap='PuBu', 
     vmin=20, vmax=100)
plt.show()

## Republican vs Democract

In [None]:
#Results 20000 Elections
#https://www.archives.gov/electoral-college/2000

# Define the states and their party affiliation
data = {
    "State": [
        "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Colorado", 
        "Connecticut", "Delaware", "District of Columbia", "Florida", "Georgia", 
        "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", 
        "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota", 
        "Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire", 
        "New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota", 
        "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", 
        "South Dakota", "Tennessee", "Texas", "Utah", "Virginia", "Washington", 
        "West Virginia", "Wisconsin", "Wyoming"
    ],
    "Republican": [
        1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,  # Alabama to Hawaii
        1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0,  # Idaho to Minnesota
        1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,  # Mississippi to New Jersey
        0, 0, 0, 1, 1, 1, 1, 0, 0, 1,  # New Mexico to South Dakota
        1, 1, 1, 1, 0, 0  # Texas to Washington
    ]
}

#Create a DataFrame from the dictionary
df_data = pd.DataFrame(data)

state_df = state_df.merge(df_data, left_on='Province_State', right_on='State', how='left')
state_df

In [None]:
#Map for all US states 

from matplotlib.colors import ListedColormap


fig, tax = plt.subplots(figsize=(50, 20)) 

state_df.plot(ax=tax, edgecolor="black", color="white")

tax.set_xlabel('longitude')
tax.set_ylabel('latitude')

tax.spines['top'].set_visible(False) 
tax.spines['right'].set_visible(False) 
plt.axis('off')

#Custom colour
colour_map = ListedColormap(['#1f77b4', "red"])

state_df.plot(
     ax=tax, edgecolor='black', column='Republican', 
     legend=True, cmap=colour_map, 
     vmin=0, vmax=1)
plt.show()

In [None]:
# Calculate the average and standard deviation 
summary = state_df.groupby('Republican')['avg_resolution'].agg(['mean', 'std']).reset_index()
summary['Party'] = summary['Republican'].replace({1: 'Republican', 0: 'Democract'})

# Create the bar plot with error bars
plt.figure(figsize=(7, 7), facecolor='white')  
ax = plt.subplot(111, facecolor='white')  
bars = plt.bar(summary['Party'], summary['mean'], yerr=summary['std'], capsize=5, color=['skyblue', 'lightcoral'])

# Add labels and title
plt.ylabel('Average Resolution')
plt.xlabel('Party Affiliation')

# Remove plot spines for a clean look
for spine in ax.spines.values():
    spine.set_visible(False)

# Show the plot
plt.xticks(rotation=0)  
plt.tight_layout()       
plt.show()