In [14]:
import pandas as pd
from datetime import timedelta, datetime, date

In [15]:
# Filter useful info
df = pd.read_csv("crime.csv")[["IncidentDate", "Address"]]

df.head()

Unnamed: 0,IncidentDate,Address
0,2005-10-07T03:33:00.000Z,700 Block of E. Washington Ave. ...
1,2005-03-03T05:42:00.000Z,First St & E Washington Ave ...
2,2005-03-02T14:30:00.000Z,8 New Berm Court ...
3,2005-03-03T13:30:00.000Z,4300 Lumley Rd ...
4,2005-03-04T19:45:00.000Z,211 S. Carroll Street ...


In [16]:
# Convert to datetime
df["IncidentDate"] = pd.to_datetime(df["IncidentDate"])

In [17]:
# Add day column
df["date"] = df["IncidentDate"].dt.date
df.head()

Unnamed: 0,IncidentDate,Address,date
0,2005-10-07 03:33:00+00:00,700 Block of E. Washington Ave. ...,2005-10-07
1,2005-03-03 05:42:00+00:00,First St & E Washington Ave ...,2005-03-03
2,2005-03-02 14:30:00+00:00,8 New Berm Court ...,2005-03-02
3,2005-03-03 13:30:00+00:00,4300 Lumley Rd ...,2005-03-03
4,2005-03-04 19:45:00+00:00,211 S. Carroll Street ...,2005-03-04


In [23]:
# Add percent of day column
df["percent"] = (df["IncidentDate"] - df["IncidentDate"].dt.floor("d")) / timedelta(days=1)
df.head()

Unnamed: 0,IncidentDate,Address,date,percent
0,2005-10-07 03:33:00+00:00,700 Block of E. Washington Ave. ...,2005-10-07,0.147917
1,2005-03-03 05:42:00+00:00,First St & E Washington Ave ...,2005-03-03,0.2375
2,2005-03-02 14:30:00+00:00,8 New Berm Court ...,2005-03-02,0.604167
3,2005-03-03 13:30:00+00:00,4300 Lumley Rd ...,2005-03-03,0.5625
4,2005-03-04 19:45:00+00:00,211 S. Carroll Street ...,2005-03-04,0.822917


In [30]:
df = df[df["date"] == date(2008, 10, 12)].copy()
df

Unnamed: 0,IncidentDate,Address,date,percent
2967,2008-10-12 04:26:00+00:00,3019 E. Washington Ave.,2008-10-12,0.184722
2968,2008-10-12 08:00:00+00:00,4300 Doncaster Dr,2008-10-12,0.333333
2969,2008-10-12 08:00:00+00:00,500 State St.,2008-10-12,0.333333
2970,2008-10-12 06:30:00+00:00,123 E. Doty St.,2008-10-12,0.270833
2971,2008-10-12 07:10:00+00:00,400 Block N. Henry St.,2008-10-12,0.298611
2972,2008-10-12 05:27:00+00:00,500 N. Lake St.,2008-10-12,0.227083
2974,2008-10-12 02:24:00+00:00,E. Johnson Street @ Pinckney Street,2008-10-12,0.1
2979,2008-10-12 23:31:00+00:00,1600 block Troy Drive,2008-10-12,0.979861
2980,2008-10-12 05:06:00+00:00,600 block of State Street,2008-10-12,0.2125
2982,2008-10-12 05:06:00+00:00,100 block South Randall Avenue,2008-10-12,0.2125


In [32]:
def get_api_key():
    with open("secret.txt") as f:
        return f.read().strip()

In [46]:
from shapely.geometry import Point
import requests

# Get lat and long for addr
def get_loc(addr):
    addr += " Madison WI"
    addr = addr.replace(" ", "+") # simple URL encoding
    url = "https://maps.googleapis.com/maps/api/geocode/json?address={}&key={}"
    url = url.format(addr, get_api_key())
    r = requests.get(url) # pay each time this line runs! bad!
    r.raise_for_status()
    coord = r.json()["results"][0]["geometry"]["location"] 
    return Point(coord["lng"], coord["lat"])

print(get_loc("500 State St."))

POINT (-89.3934697 43.074861)


In [56]:
# Add column with lats and longs of each addr
df["geometry"] = None
for idx in df.index:
    addr = df.loc[idx, "Address"]
    pt = get_loc(addr)
    df.loc[idx, "geometry"] = pt
df.head()

Unnamed: 0,IncidentDate,Address,date,percent,geometry
2967,2008-10-12 04:26:00+00:00,3019 E. Washington Ave.,2008-10-12,0.184722,POINT (-89.3419911 43.1037753)
2968,2008-10-12 08:00:00+00:00,4300 Doncaster Dr,2008-10-12,0.333333,POINT (-89.44864729999999 43.0361164)
2969,2008-10-12 08:00:00+00:00,500 State St.,2008-10-12,0.333333,POINT (-89.3934697 43.074861)
2970,2008-10-12 06:30:00+00:00,123 E. Doty St.,2008-10-12,0.270833,POINT (-89.38006469999999 43.0743762)
2971,2008-10-12 07:10:00+00:00,400 Block N. Henry St.,2008-10-12,0.298611,POINT (-89.39078479999999 43.0754426)


In [58]:
import geopandas

In [62]:
gdf = geopandas.GeoDataFrame(df)
gdf

Unnamed: 0,IncidentDate,Address,date,percent,geometry
2967,2008-10-12 04:26:00+00:00,3019 E. Washington Ave.,2008-10-12,0.184722,POINT (-89.34199 43.10378)
2968,2008-10-12 08:00:00+00:00,4300 Doncaster Dr,2008-10-12,0.333333,POINT (-89.44865 43.03612)
2969,2008-10-12 08:00:00+00:00,500 State St.,2008-10-12,0.333333,POINT (-89.39347 43.07486)
2970,2008-10-12 06:30:00+00:00,123 E. Doty St.,2008-10-12,0.270833,POINT (-89.38006 43.07438)
2971,2008-10-12 07:10:00+00:00,400 Block N. Henry St.,2008-10-12,0.298611,POINT (-89.39078 43.07544)
2972,2008-10-12 05:27:00+00:00,500 N. Lake St.,2008-10-12,0.227083,POINT (-89.39736 43.07490)
2974,2008-10-12 02:24:00+00:00,E. Johnson Street @ Pinckney Street,2008-10-12,0.1,POINT (-89.38581 43.07767)
2979,2008-10-12 23:31:00+00:00,1600 block Troy Drive,2008-10-12,0.979861,POINT (-89.36811 43.13369)
2980,2008-10-12 05:06:00+00:00,600 block of State Street,2008-10-12,0.2125,POINT (-89.39539 43.07490)
2982,2008-10-12 05:06:00+00:00,100 block South Randall Avenue,2008-10-12,0.2125,POINT (-89.40900 43.06666)


In [63]:
gdf[["percent", "geometry"]].to_file("crime.shp")