In [43]:
%matplotlib inline

In [44]:
import pandas as pd
import numpy as np

In [45]:
import igraph as ig

In [46]:
from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
from plotly.graph_objs import *
init_notebook_mode(connected=True)

# Read and parse data

Take a look at what the file looks like:

In [47]:
! head ./example_graphs/trip_data_2013_12_31_cleaned.csv.el.001

1123	16610
(-74.284,40.644999999999925)	(-74.26000000000002,40.63799999999993)
(-74.25200000000002,40.74999999999989)	(-73.88400000000023,40.77099999999989)
(-74.23600000000003,40.70099999999991)	(-74.00400000000016,40.7219999999999)
(-74.23600000000003,40.79899999999988)	(-73.89200000000022,40.75699999999989)
(-74.21200000000005,40.74999999999989)	(-73.94800000000019,40.67299999999992)
(-74.18800000000006,40.679999999999914)	(-73.74000000000031,40.95299999999983)
(-74.18800000000006,40.707999999999906)	(-73.78800000000028,40.644999999999925)
(-74.18800000000006,41.043999999999805)	(-74.00400000000016,40.742999999999896)
(-74.18000000000006,40.69399999999991)	(-74.03600000000014,40.7359999999999)


In [48]:
df = pd.read_csv('./example_graphs/trip_data_2013_12_31_cleaned.csv.el.001', sep='\t', skiprows=1, names=['source', 'target'])
df = df.applymap(lambda x: eval(x))

In [49]:
df['source_long'] = df['source'].apply(lambda x: x[0])
df['source_lat'] = df['source'].apply(lambda x: x[1])

df['target_long'] = df['target'].apply(lambda x: x[0])
df['target_lat'] = df['target'].apply(lambda x: x[1])

In [50]:
df.head()

Unnamed: 0,source,target,source_long,source_lat,target_long,target_lat
0,"(-74.284, 40.645)","(-74.26, 40.638)",-74.284,40.645,-74.26,40.638
1,"(-74.252, 40.75)","(-73.884, 40.771)",-74.252,40.75,-73.884,40.771
2,"(-74.236, 40.701)","(-74.004, 40.722)",-74.236,40.701,-74.004,40.722
3,"(-74.236, 40.799)","(-73.892, 40.757)",-74.236,40.799,-73.892,40.757
4,"(-74.212, 40.75)","(-73.948, 40.673)",-74.212,40.75,-73.948,40.673


In [51]:
names = names={'source':'name', 'source_long':'long', 'source_lat':'lat', 'target':'name', 'target_long':'long', 'target_lat':'lat',}
df_locations = pd.DataFrame(pd.concat([df[['source', 'source_long', 'source_lat']].copy().rename(columns=names), df[['target', 'target_long', 'target_lat']].copy().rename(columns=names)], ignore_index=True).drop_duplicates())

In [52]:
df.shape[0] # number of edges

16610

In [53]:
df_locations.shape[0] # number vertices

1123

# Build graph

In [54]:
locations = Data([Scattermapbox(
                                lat=df_locations['lat'],
                                lon=df_locations['long'],
                                mode='markers',
                                marker=Marker(
                                              size=9
                                             ),
                                text=df_locations['name'],
                                )
                 ])

In [55]:
paths = Data([Scattermapbox(
                            lat= [ df['source_lat'][i], df['target_lat'][i] ],
                            lon= [ df['source_long'][i], df['target_long'][i] ],
                            mode='lines',
                            )
             for i in range(len(df[:10]))
             ]
            )

In [56]:
mapbox_access_token = 'pk.eyJ1IjoiY2JsZWtlciIsImEiOiJjamRrbDdhbG0wMmN0MndvMDVzbHJ5dXZ1In0.tP2zTKOSFRDCDSj84K2ceA'

In [63]:
layout = Layout(
                autosize=False,
                height=800,
                width=1600,
                hovermode='closest',
                mapbox=dict(
                            accesstoken=mapbox_access_token,
                            bearing=0,
                            center=dict(
                                        lat=40.734816, #y
                                        lon=-73.960158 #x
                                        ),
                            pitch=0,
                            zoom=10
                            ),
                showlegend=False
                )

In [64]:
fig = dict( data=paths+locations, layout=layout )
iplot( fig )