In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import folium
from folium import plugins
from folium.plugins import HeatMap
from ipywidgets import widget
from IPython.display import display
from IPython.html.widgets import *
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [2]:
regions = pd.read_csv('regions.csv', sep = ';')
regions.head()

Unnamed: 0,region,west,east,south,north
0,1,-74.25559,-74.244478,40.49612,40.504508
1,2,-74.25559,-74.244478,40.504508,40.512896
2,3,-74.25559,-74.244478,40.512896,40.521285
3,4,-74.25559,-74.244478,40.521285,40.529673
4,5,-74.25559,-74.244478,40.529673,40.538061


In [3]:
with open('list102.txt', 'r') as f:
    region_list = re.split('[,]', f.read())

region_list = [int(x) for x in region_list]
regions1 = pd.DataFrame(region_list, columns=['region'])

column_names = ['r' + str(num) for num in region_list]
column_names1 = [str(num) for num in region_list]

#Загрузим временной ряд поездок за июнь 
df = pd.read_csv('agg201606.csv', parse_dates =['tpep_pickup_datetime'])
df.rename(columns = {'region_number': 'region'}, inplace = True)
#и оставим только 102 региона, полученных ранее 
df = df.merge(regions1, on = 'region', how = 'inner')
df.head()

Unnamed: 0,tpep_pickup_datetime,region,n
0,2016-06-01 00:00:00,1075,26
1,2016-06-01 01:00:00,1075,14
2,2016-06-01 02:00:00,1075,5
3,2016-06-01 03:00:00,1075,2
4,2016-06-01 04:00:00,1075,1


In [4]:
#Немного преобразуем для удобства
y = df.pivot_table(index = ['tpep_pickup_datetime'], columns = ['region'], values = 'n').fillna(0)
y.columns = column_names

#Загрузим предсказанный временной ряд поездок за июнь
y_pred = pd.read_csv('june_time_series_predicted.csv', index_col = 'tpep_pickup_datetime', parse_dates=['tpep_pickup_datetime'])


## Построим временной ряд фактического и прогнозируемого спроса на такси в выбираемой области

In [5]:
def time_series(dt = 600, region = 'r1075'):
    plt.figure(figsize = (20, 10))
    #Реальные данные отображаем синим цветом
    y.loc[y.index[dt] : '2016-06-30 23:00:00'][region].plot(legend = True)
    #Прогнозные данные красным цветом
    y_pred.loc[y.index[dt] : '2016-06-30 23:00:00'][region].plot(c = 'r', legend = True)
    plt.grid()
    plt.show()


### Запускаем ipywydget

In [6]:
interact(time_series, dt = (0, 720, 1), region = column_names)

interactive(children=(IntSlider(value=600, description='dt', max=720), Dropdown(description='region', options=…

<function __main__.time_series(dt=600, region='r1075')>

## Отобразим на карте среднее количество поездок в час из каждой региона.
Загрузим данные

In [7]:
# Среднее количество поездок в час из каждой зоны
df_r = pd.read_csv('average_trips_real.csv')
df_r.head()

Unnamed: 0,region,summary
0,1075,72.0
1,1076,139.0
2,1077,90.0
3,1125,68.0
4,1126,187.0


In [8]:
# Среднее, предсказанное, количество поездок в час из каждой зоны
df_p = pd.read_csv('average_trips_predict.csv')
df_p.head()

Unnamed: 0,region,summary
0,1075,72.0
1,1076,139.0
2,1077,90.0
3,1125,68.0
4,1126,189.0


In [9]:
#подготовим данные - добавим координаты регионов для отображения на карте

df_r_agg = pd.merge(regions, df_r, on = 'region', how = 'inner').fillna(0)

df_p_agg = pd.merge(regions, df_p, on = 'region', how = 'inner').fillna(0)

In [10]:
m = folium.Map(location=(40.738667, -73.985731), zoom_start=11)

mcg = folium.FeatureGroup(control=False)
m.add_child(mcg)

g1 = folium.plugins.FeatureGroupSubGroup(mcg, 'real')
for i in range(df_r_agg.region.shape[0]):
    folium.Rectangle(bounds= [df_p_agg.loc[i][['south', 'west']].values, df_p_agg.loc[i][['north', 'east']].values],
                        weight= 1, color = '#ebefff', fill_color = '#ff0000',
                        opacity= 0.3,
                        fill_opacity = np.log(df_p_agg.loc[i]['summary']/df_p_agg.summary.max() + 1), 
                        popup = 'region:%d'%df_p_agg.loc[i].region
                        ).add_to(g1)
g2 = folium.plugins.FeatureGroupSubGroup(mcg, 'predicted')
for i in range(df_r_agg.region.shape[0]):
    folium.Rectangle(bounds= [df_p_agg.loc[i][['south', 'west']].values, df_p_agg.loc[i][['north', 'east']].values],
                        weight= 1, color = '#ebefff', fill_color = '#ff0000',
                        opacity= 0.3,
                        fill_opacity = np.log(df_p_agg.loc[i]['summary']/df_p_agg.summary.max() + 1), 
                        popup = 'region:%d'%df_p_agg.loc[i].region
                        ).add_to(g2)
m.add_child(g1)
m.add_child(g2)

folium.LayerControl(collapsed= False).add_to(m)
plugins.Fullscreen(
    position='topright',
    title='Expand',
    title_cancel='Exit',
    force_separate_button=True
).add_to(m)
m

In [11]:
#Построим Heatmap по реальному и прогнозному временному ряду

df_p_agg['longitude'] = (df_p_agg.east.values + df_p_agg.west.values)/2
df_p_agg['latitude'] = (df_p_agg.south.values + df_p_agg.north.values)/2

df_r_agg['longitude'] = (df_r_agg.east.values + df_r_agg.west.values)/2
df_r_agg['latitude'] = (df_r_agg.south.values + df_r_agg.north.values)/2

In [12]:
map_hooray = folium.Map(location=(40.738667, -73.985731), zoom_start=11)


heat_data1 = [[row['latitude'],row['longitude']] for index, row in df_r_agg.iterrows()]
heat_data2 = [[row['latitude'],row['longitude']] for index, row in df_p_agg.iterrows()]

mcg2 = folium.FeatureGroup(control=False)
map_hooray.add_child(mcg2)

g_hm1 = folium.plugins.FeatureGroupSubGroup(mcg2, 'real')
g_hm2 = folium.plugins.FeatureGroupSubGroup(mcg2, 'predicted')
# Plot it on the map
HeatMap(heat_data1).add_to(g_hm1)
HeatMap(heat_data1).add_to(g_hm2)
map_hooray.add_child(g_hm1)
map_hooray.add_child(g_hm2)

folium.LayerControl(collapsed= False).add_to(map_hooray)
map_hooray