In [4]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import RobustScaler
import pickle
import descartes
from shapely.geometry import Point, Polygon
import fiona


In [5]:
data = pd.read_csv('data/West_Nile_Virus__WNV__Mosquito_Test_Results.csv')
data = data.rename(columns={"TEST DATE": "date",
                            "NUMBER OF MOSQUITOES":"num",
                            "LATITUDE":"lat","LONGITUDE":"lon",
                            "TRAP":"trap",
                            "SEASON YEAR":"year",
                            "WEEK":"week",
                            "Census Tracts":"tract",
                            "Zip Codes":"zip",
                            "Community Areas":"community",
                            "Historical Wards 2003-2015":"ward"})
data['date'] = pd.to_datetime(data['date'])
data['date'] = data['date'].dt.date
data.drop(columns=['TEST ID','SPECIES'])
data = data.dropna()

In [7]:
crs = {'init': 'epsg:4326'}
water_map = gpd.read_file('geography/water/chicago_latlon.shp')
lake_MI = water_map[water_map.NAME == 'LAKE MICHIGAN']

water_maps = water_map[water_map.NAME != 'LAKE MICHIGAN']
water_maps = water_maps.reset_index(drop=False)

park_map = gpd.read_file('geography/parks/chicago_parks1.shp')
forest_map = gpd.read_file('geography/forest/forest.shp')


by_trap = data.groupby('trap').mean()
by_trap = by_trap.reset_index(drop=False)

geometry = [Point(xy) for xy in zip(by_trap.lon,by_trap.lat)]
geo_traps = gpd.GeoDataFrame(by_trap, crs = crs, geometry = geometry)
geo_traps = geo_traps.reset_index(drop=False)

green_geo = park_map.geometry.append(forest_map.geometry)

green_map = gpd.GeoDataFrame(crs = crs, geometry = green_geo)
green_map = green_map.reset_index(drop=False)

data_geometry = [Point(xy) for xy in zip(data.lon,data.lat)]
geo_data = gpd.GeoDataFrame(data, crs = crs, geometry = data_geometry)

geo_traps['d_water'] = 0.
geo_traps['d_park'] = 0.
geo_traps['d_lm'] = 0.

for i,point in geo_traps.iterrows():
    distw = np.ones(shape=(len(water_maps),1))
    distf = np.ones(shape=(len(green_map),1))
    for j,mi in lake_MI.iterrows():
        distl = point['geometry'].distance(mi['geometry'])
    for j,lake in water_maps.iterrows():
        distw[j] = point['geometry'].distance(lake['geometry'])
    for j,tree in green_map.iterrows():
        distf[j] = point['geometry'].distance(tree['geometry'])
    geo_traps.loc[i,'d_water'] = min(distw)
    geo_traps.loc[i,'d_park'] = min(distf)
    geo_traps.loc[i,'d_lm'] = distl

geo_data['d_water'] = 0.
geo_data['d_park'] = 0.
geo_data['d_lm'] = 0.
for i,point in geo_data.iterrows():
    geo_data.loc[i,'d_water']=geo_traps[geo_traps.trap==point.trap]['d_water'].values[0]
    geo_data.loc[i,'d_park']=geo_traps[geo_traps.trap==point.trap]['d_park'].values[0]
    geo_data.loc[i,'d_lm']=geo_traps[geo_traps.trap==point.trap]['d_lm'].values[0]

In [None]:
geo_data['TH0']=0.
geo_data['TL1']=0.
geo_data['Tmax']=0.
geo_data['Tmin']=0.
geo_data['prec']=0.
geo_data = geo_data[(geo_data['date'] > pd.to_datetime('2007-06-11'))]
for i in range(len(geo_data)):
    d0 = geo_data['date'].iloc[i]
    d1 = d0 + pd.DateOffset(-1)
    d2 = d0 + pd.DateOffset(-2)
    d3 = d0 + pd.DateOffset(-3)
    d4 = d0 + pd.DateOffset(-4)
    d5 = d0 + pd.DateOffset(-5)
    d6 = d0 + pd.DateOffset(-6)
    d7 = d0 + pd.DateOffset(-7)
    d8 = d0 + pd.DateOffset(-8)
    d9 = d0 + pd.DateOffset(-9)
    d10 = d0 + pd.DateOffset(-10)
    TH0 = w[w.date == d0]['Tmax'].values[0]
    TL0 = w[w.date == d0]['Tmin'].values[0]
    TH1 = w[w.date == d1]['Tmax'].values[0]
    TL1 = w[w.date == d1]['Tmin'].values[0]
    TH2 = w[w.date == d2]['Tmax'].values[0]
    TL2 = w[w.date == d2]['Tmin'].values[0]
    TH3 = w[w.date == d3]['Tmax'].values[0]
    TL3 = w[w.date == d3]['Tmin'].values[0]
    TH4 = w[w.date == d4]['Tmax'].values[0]
    TL4 = w[w.date == d4]['Tmin'].values[0]
    TH5 = w[w.date == d5]['Tmax'].values[0]
    TL5 = w[w.date == d5]['Tmin'].values[0]
    TH6 = w[w.date == d6]['Tmax'].values[0]
    TL6 = w[w.date == d6]['Tmin'].values[0]
    TH7 = w[w.date == d7]['Tmax'].values[0]
    TL7 = w[w.date == d7]['Tmin'].values[0]
    TH8 = w[w.date == d8]['Tmax'].values[0]
    TL8 = w[w.date == d8]['Tmin'].values[0]
    TH9 = w[w.date == d9]['Tmax'].values[0]
    TL9 = w[w.date == d9]['Tmin'].values[0]
    TH10 = w[w.date == d10]['Tmax'].values[0]
    TL10 = w[w.date == d10]['Tmin'].values[0]
    Tmax = max([TH0,TH1,TH2,TH3,TH4,TH5,TH6,TH7,TH8,TH9,TH10])
    Tmin = min([TL0,TL1,TL2,TL3,TL4,TL5,TL6,TL7,TL8,TL9,TL10])
    p0 = w[w.date == d0]['p'].values[0]
    p1 = w[w.date == d1]['p'].values[0]
    p2 = w[w.date == d2]['p'].values[0]
    p3 = w[w.date == d3]['p'].values[0]
    p4 = w[w.date == d4]['p'].values[0]
    p5 = w[w.date == d5]['p'].values[0]
    p6 = w[w.date == d6]['p'].values[0] 
    p7 = w[w.date == d7]['p'].values[0]
    p8 = w[w.date == d8]['p'].values[0]
    p9 = w[w.date == d9]['p'].values[0]
    p10 = w[w.date == d10]['p'].values[0]
    precip = [p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10]
    prec=sum(precip)
    geo_data['TH0'].iloc[i]=TH0
    geo_data['TL1'].iloc[i]=TL1
    geo_data['Tmax'].iloc[i] = Tmax
    geo_data['Tmin'].iloc[i] = Tmin
    geo_data['prec'].iloc[i] = prec
print('Done')
geo_data.to_csv('test.csv')