# Gravity Model

In [8]:
import folium
import geopandas
from haversine import haversine
from matplotlib import pyplot as plt
import numpy as np
import pandas
from shapely.geometry import Point

from dpd.geometry import dms_to_dd
from dpd.wikipedia import get_wikipedia_coordinates, get_wikipedia_table
from dpd.modeling import GravityModel

pandas.set_option('display.max_columns', None)
pandas.set_option('display.max_rows', None)

In [2]:
url = "https://en.wikipedia.org/wiki/Combined_statistical_area"
csa_styled = get_wikipedia_table(url, 1, styled=True)
csa = get_wikipedia_table(url, 1)
csa.set_index("Rank", inplace=True)
csa.drop(index="143", inplace=True)
csa_styled.data.drop(index=csa_styled.data.iloc[142].name, inplace=True)
csa["2019 estimate"] = csa["2019 estimate"].map(lambda x: int(x.replace(',', '')))
coordinates = csa_styled.data["Combined\xa0statistical\xa0area"].map(
    lambda row: get_wikipedia_coordinates(row.split("href=\"")[1].split("\"")[0])
)
coordinates.index = csa.index
csa["coordinates"] = coordinates
csa = csa[csa["coordinates"] != (None, None)]
csa["dd"] = csa["coordinates"].map(lambda latlon: (dms_to_dd(latlon[0]), dms_to_dd(latlon[1])))
csa["geometry"] = csa["dd"].map(lambda x: Point(x[1], x[0]))
csa = geopandas.GeoDataFrame(csa)
csa.crs = "EPSG:4326"
csa.head()

Unnamed: 0_level_0,Combined statistical area,2019 estimate,2010 Census,Change,Constituent core-based statistical areas,coordinates,dd,geometry
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,"New York-Newark, NY-NJ-CT-PA Combined Statisti...",22589036,22255491,+1.50%,"New York-Newark-Jersey City, NY-NJ-PA Metropol...","(40°48′31″N, 74°1′13.39″W)","(40.808611111111105, -74.02038611111111)",POINT (-74.02039 40.80861)
2,"Los Angeles-Long Beach, CA Combined Statistica...",18711436,17877006,+4.67%,"Los Angeles-Long Beach-Anaheim, CA Metropolita...","(34°00′N, 118°12′W)","(34.0, -118.2)",POINT (-118.20000 34.00000)
3,"Chicago-Naperville, IL-IN-WI Combined Statisti...",9825325,9840929,−0.16%,"Chicago-Naperville-Elgin, IL-IN-WI Metropolita...","(41°54′N, 87°39′W)","(41.9, -87.65)",POINT (-87.65000 41.90000)
4,"Washington-Baltimore-Arlington, DC-MD-VA-WV-PA...",9814928,9050192,+8.45%,"Washington-Arlington-Alexandria, DC-VA-MD-WV M...","(38°58′N, 77°19′W)","(38.96666666666667, -77.31666666666666)",POINT (-77.31667 38.96667)
5,"San Jose-San Francisco-Oakland, CA Combined St...",9665887,8923942,+8.31%,"San Francisco-Oakland-Berkeley, CA Metropolita...","(37°45′N, 122°17′W)","(37.75, -122.28333333333333)",POINT (-122.28333 37.75000)


In [26]:
folium_map = folium.Map(location=(39, -95), zoom_start=4)
geojson = folium.GeoJson(                                  
    csa.to_json(),
    tooltip=folium.features.GeoJsonTooltip(fields=["Combined\xa0statistical\xa0area"]),    
)                 
geojson.add_to(folium_map)
folium_map

In [4]:
# from https://pedestrianobservations.com/2020/02/13/metcalfes-law-for-high-speed-rail/
HighSpeedRailModel = GravityModel(G=75000, a=.8, b=.8, d=2)

def calculate_distance(origin, destination):
    distance = haversine(
                    origin,
                    destination
                )
    if distance == 0:
        distance = np.inf
    if distance < 500:
        distance = 500
    return distance

rows = []
for origin in csa.index:
    row = []
    for destination in csa.index:
        row.append(
            HighSpeedRailModel.compute(
                csa["2019 estimate"][origin] / 1000000,
                csa["2019 estimate"][destination] / 1000000,
                calculate_distance(
                    csa["dd"][origin],
                    csa["dd"][destination]
                )
            )
        )
    rows.append(row)

df = pandas.DataFrame(rows)
df.index = csa["Combined\xa0statistical\xa0area"]
df.columns = csa["Combined\xa0statistical\xa0area"]
df = df.astype(int)
df = df.astype(str)
df.replace("0", "", inplace=True)
df

Combined statistical area,"New York-Newark, NY-NJ-CT-PA Combined Statistical Area","Los Angeles-Long Beach, CA Combined Statistical Area","Chicago-Naperville, IL-IN-WI Combined Statistical Area","Washington-Baltimore-Arlington, DC-MD-VA-WV-PA Combined Statistical Area","San Jose-San Francisco-Oakland, CA Combined Statistical Area","Boston-Worcester-Providence, MA-RI-NH-CT Combined Statistical Area","Philadelphia-Reading-Camden, PA-NJ-DE-MD Combined Statistical Area","Miami-Port St. Lucie-Fort Lauderdale, FL Combined Statistical Area","Atlanta–Athens-Clarke County–Sandy Springs, GA-AL Combined Statistical Area","Detroit-Warren-Ann Arbor, MI Combined Statistical Area","Phoenix-Mesa, AZ Combined Statistical Area","Minneapolis-St. Paul, MN-WI Combined Statistical Area","Denver-Aurora, CO Combined Statistical Area","Cleveland-Akron-Canton, OH Combined Statistical Area","Portland-Vancouver-Salem, OR-WA Combined Statistical Area","Charlotte-Concord, NC-SC Combined Statistical Area","Salt Lake City-Provo-Orem, UT Combined Statistical Area","Columbus-Marion-Zanesville, OH Combined Statistical Area","Kansas City-Overland Park-Kansas City, MO-KS Combined Statistical Area","Las Vegas-Henderson, NV Combined Statistical Area","Cincinnati-Wilmington-Maysville, OH-KY-IN Combined Statistical Area","Raleigh-Durham-Cary, NC Combined Statistical Area","Milwaukee-Racine-Waukesha, WI Combined Statistical Area","Virginia Beach-Norfolk, VA-NC Combined Statistical Area","Greensboro–Winston-Salem–High Point, NC Combined Statistical Area","Louisville/Jefferson County–Elizabethtown–Bardstown, KY-IN Combined Statistical Area","Oklahoma City-Shawnee, OK Combined Statistical Area","Greenville-Spartanburg-Anderson, SC Combined Statistical Area","Grand Rapids-Kentwood-Muskegon, MI Combined Statistical Area","Memphis-Forrest City, TN-MS-AR Combined Statistical Area","Birmingham-Hoover-Talladega, AL Combined Statistical Area","Fresno-Madera-Hanford, CA Combined Statistical Area","Harrisburg-York-Lebanon, PA Combined Statistical Area","Buffalo-Cheektowaga-Olean, NY Combined Statistical Area","Cape Coral-Fort Myers-Naples, FL Combined Statistical Area","Albany-Schenectady, NY Combined Statistical Area","Tucson-Nogales, AZ Combined Statistical Area","Omaha-Council Bluffs-Fremont, NE-IA Combined Statistical Area","McAllen-Edinburg, TX Combined Statistical Area","Little Rock-North Little Rock, AR Combined Statistical Area","Des Moines-Ames-West Des Moines, IA Combined Statistical Area","Toledo-Findlay-Tiffin, OH Combined Statistical Area","Boise City-Mountain Home-Ontario, ID-OR Combined Statistical Area","Charleston-Huntington-Ashland, WV-OH-KY Combined Statistical Area","Lexington-Fayette–Richmond–Frankfort, KY Combined Statistical Area","Mobile-Daphne-Fairhope, AL Combined Statistical Area","Youngstown-Warren, OH-PA Combined Statistical Area","Huntsville-Decatur, AL Combined Statistical Area","Fort Wayne-Huntington-Auburn, IN Combined Statistical Area","Myrtle Beach-Conway, SC-NC Combined Statistical Area","Kalamazoo-Battle Creek-Portage, MI Combined Statistical Area","Davenport-Moline, IA-IL Combined Statistical Area","Montgomery-Selma-Alexander City, AL Combined Statistical Area","Appleton-Oshkosh-Neenah, WI Combined Statistical Area","Saginaw-Midland-Bay City, MI Combined Statistical Area","Green Bay-Shawano, WI Combined Statistical Area","Erie-Meadville, PA Combined Statistical Area","Medford-Grants Pass, OR Combined Statistical Area","Wausau-Stevens Point-Wisconsin Rapids, WI Combined Statistical Area","Springfield-Jacksonville-Lincoln, IL Combined Statistical Area","Greenville-Kinston-Washington, NC Combined Statistical Area","Fargo-Wahpeton, ND-MN Combined Statistical Area","Bloomsburg-Berwick-Sunbury, PA Combined Statistical Area","Columbia-Moberly-Mexico, MO Combined Statistical Area","Idaho Falls-Rexburg-Blackfoot, ID Combined Statistical Area","Redding-Red Bluff, CA Combined Statistical Area","State College-DuBois, PA Combined Statistical Area","Bend-Prineville, OR Combined Statistical Area","Lima-Van Wert-Celina, OH Combined Statistical Area","Pueblo-Cañon City, CO Combined Statistical Area","Mansfield-Ashland-Bucyrus, OH Combined Statistical Area","Eau Claire-Menomonie, WI Combined Statistical Area","Bloomington-Pontiac, IL Combined Statistical Area","Johnstown-Somerset, PA Combined Statistical Area","Tupelo-Corinth, MS Combined Statistical Area","Dothan-Ozark, AL Combined Statistical Area","Morgantown-Fairmont, WV Combined Statistical Area","New Bern-Morehead City, NC Combined Statistical Area","Jonesboro-Paragould, AR Combined Statistical Area","Altoona-Huntingdon, PA Combined Statistical Area","Williamsport-Lock Haven, PA Combined Statistical Area","Ithaca-Cortland, NY Combined Statistical Area","Parkersburg-Marietta-Vienna, WV-OH Combined Statistical Area","Paducah-Mayfield, KY-IL Combined Statistical Area","Hot Springs-Malvern, AR Combined Statistical Area","Edwards-Glenwood Springs, CO Combined Statistical Area","Mankato-New Ulm, MN Combined Statistical Area","Scottsboro-Fort Payne, AL Combined Statistical Area","Quincy-Hannibal, IL-MO Combined Statistical Area","Mount Pleasant-Alma, MI Combined Statistical Area","Burlington-Fort Madison-Keokuk, IA-IL-MO Combined Statistical Area","Dixon-Sterling, IL Combined Statistical Area","Richmond-Connersville, IN Combined Statistical Area","Kerrville-Fredericksburg, TX Combined Statistical Area","Columbus-West Point, MS Combined Statistical Area","Cleveland-Indianola, MS Combined Statistical Area","Steamboat Springs-Craig, CO Combined Statistical Area","Spencer-Spirit Lake, IA Combined Statistical Area"
Combined statistical area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1
"New York-Newark, NY-NJ-CT-PA Combined Statistical Area",,,4.0,22.0,,19.0,17.0,1.0,2.0,5.0,,1.0,,7.0,,2.0,,3.0,,,2.0,3.0,1.0,5.0,2.0,1.0,,1.0,1.0,,,,4.0,4.0,,4.0,,,,,,1.0,,1.0,,,1.0,,,,,,,,,,1.0,,,,,,1.0,,,,1.0,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,
"Los Angeles-Long Beach, CA Combined Statistical Area",,,,,15.0,,,,,,8.0,,1.0,,1.0,,1.0,,,6.0,,,,,,,,,,,,3.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Chicago-Naperville, IL-IN-WI Combined Statistical Area",4.0,,,3.0,,1.0,2.0,,2.0,7.0,,4.0,,4.0,,1.0,,3.0,2.0,,3.0,,3.0,,,2.0,,,2.0,,,,,1.0,,,,,,,1.0,1.0,,,1.0,,,,1.0,,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Washington-Baltimore-Arlington, DC-MD-VA-WV-PA Combined Statistical Area",22.0,,3.0,,,6.0,9.0,1.0,2.0,4.0,,,,5.0,,3.0,,3.0,,,2.0,3.0,,3.0,2.0,1.0,,1.0,,,,,2.0,2.0,,2.0,,,,,,1.0,,1.0,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"San Jose-San Francisco-Oakland, CA Combined Statistical Area",,15.0,,,,,,,,,1.0,,,,1.0,,1.0,,,2.0,,,,,,,,,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Boston-Worcester-Providence, MA-RI-NH-CT Combined Statistical Area",19.0,,1.0,6.0,,,7.0,,,1.0,,,,1.0,,,,,,,,,,1.0,,,,,,,,,1.0,1.0,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Philadelphia-Reading-Camden, PA-NJ-DE-MD Combined Statistical Area",17.0,,2.0,9.0,,7.0,,,1.0,2.0,,,,4.0,,1.0,,1.0,,,1.0,2.0,,2.0,1.0,,,,,,,,1.0,1.0,,1.0,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Miami-Port St. Lucie-Fort Lauderdale, FL Combined Statistical Area",1.0,,,1.0,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Atlanta–Athens-Clarke County–Sandy Springs, GA-AL Combined Statistical Area",2.0,,2.0,2.0,,,1.0,1.0,,1.0,,,,1.0,,3.0,,1.0,,,2.0,1.0,,,2.0,1.0,,1.0,,1.0,1.0,,,,,,,,,,,,,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"Detroit-Warren-Ann Arbor, MI Combined Statistical Area",5.0,,7.0,4.0,,1.0,2.0,,1.0,,,1.0,,3.0,,,,2.0,,,2.0,,2.0,,,1.0,,,1.0,,,,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
