We have 3 different datasets:
1. **Station**:
2. **Trip**:
3. **Weather**:

In [41]:
import pandas as pd
import numpy as np

import plotly
from plotly import graph_objs as go
import plotly.offline as offline
offline.init_notebook_mode(connected=True)

from helpers.helpers_data import *
from helpers.helpers_func import *
from helpers.tokens import *

# Station

In [3]:
station = pd.read_csv("data/station_data.csv")
station.shape

(76, 6)

In [6]:
station.head()

Unnamed: 0,Id,Name,Lat,Long,Dock Count,City
0,2,San Jose Diridon Caltrain Station,37.329732,-121.901782,27,San Jose
1,3,San Jose Civic Center,37.330698,-121.888979,15,San Jose
2,4,Santa Clara at Almaden,37.333988,-121.894902,11,San Jose
3,5,Adobe on Almaden,37.331415,-121.8932,19,San Jose
4,6,San Pedro Square,37.336721,-121.894074,15,San Jose


In [15]:
# Sanity Check
station.isnull().sum()

Id            0
Name          0
Lat           0
Long          0
Dock Count    0
City          0
dtype: int64

In [24]:
summary_station = pd.DataFrame()
summary_station["Count"] = station.groupby("City")["Id"].count()
summary_station["Total Docking Station"] = station.groupby("City")["Dock Count"].sum()

In [25]:
summary_station

Unnamed: 0_level_0,Count,Total Docking Station
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Mountain View,7,117
Palo Alto,5,75
Redwood City,9,145
San Francisco,39,753
San Jose,16,264


## GeoMapping of the Stations

In [48]:
def plot_stations(df, specific_cities):
    mapbox_access_token = map_token

    data = []
    mean = [0, 0]
    df_groups = df.groupby("City")
    for group in df_groups:
        if group[0] in specific_cities:
            sub_df = group[1]
            tmp = go.Scattermapbox(
                lat=sub_df['Lat'],
                lon=sub_df['Long'],
                mode='markers+text',
                marker=dict(
                    size=10,
                    color=colors[group[0]],
                ),

                textfont=dict(
                    size=9
                ),
                textposition="bottom center",
                text=list(sub_df.index),
                name=group[0]
            )
            mean[0] = np.mean(sub_df['Lat'])
            mean[1] = np.mean(sub_df['Long'])
            data.append(tmp)

    layout = go.Layout(
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=mean[0]/len(specific_cities),
                lon=mean[1]/len(specific_cities)
            ),
            pitch=0,
            zoom=13
        ),
        title='Stations locations within the 5 Areas',
        autosize=False,
        width=900,
        height=800,
        margin=go.layout.Margin(
            l=100,
            r=100,
            b=100,
            t=100,
            pad=4
        ),
        showlegend=True,
        legend = dict(orientation="h")
    )

    fig = dict(data=data, layout=layout)

    offline.iplot(fig)

In [49]:
plot_stations(station, ["San Jose"])

# Trip

In [4]:
trip = pd.read_csv("data/trip_data.csv")
trip.shape

(354152, 6)

In [7]:
trip.head()

Unnamed: 0,Trip ID,Start Date,Start Station,End Date,End Station,Subscriber Type
0,913460,31/08/2015 23:26,50,31/08/2015 23:39,70,Subscriber
1,913459,31/08/2015 23:11,31,31/08/2015 23:28,27,Subscriber
2,913455,31/08/2015 23:13,47,31/08/2015 23:18,64,Subscriber
3,913454,31/08/2015 23:10,10,31/08/2015 23:17,8,Subscriber
4,913453,31/08/2015 23:09,51,31/08/2015 23:22,60,Customer


# Weather

In [5]:
weather = pd.read_csv("data/weather_data.csv")
weather.shape

(1825, 24)

In [8]:
weather.head()

Unnamed: 0,Date,Max TemperatureF,Mean TemperatureF,Min TemperatureF,Max Dew PointF,MeanDew PointF,Min DewpointF,Max Humidity,Mean Humidity,Min Humidity,...,Mean VisibilityMiles,Min VisibilityMiles,Max Wind SpeedMPH,Mean Wind SpeedMPH,Max Gust SpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees,Zip
0,01/09/2014,83.0,70.0,57.0,58.0,56.0,52.0,86.0,64.0,42.0,...,10.0,8.0,16.0,7.0,20.0,0.0,0.0,,290.0,94107
1,02/09/2014,72.0,66.0,60.0,58.0,57.0,55.0,84.0,73.0,61.0,...,10.0,7.0,21.0,8.0,,0.0,5.0,,290.0,94107
2,03/09/2014,76.0,69.0,61.0,57.0,56.0,55.0,84.0,69.0,53.0,...,10.0,10.0,21.0,8.0,24.0,0.0,4.0,,276.0,94107
3,04/09/2014,74.0,68.0,61.0,57.0,57.0,56.0,84.0,71.0,57.0,...,10.0,8.0,22.0,8.0,25.0,0.0,5.0,,301.0,94107
4,05/09/2014,72.0,66.0,60.0,57.0,56.0,54.0,84.0,71.0,57.0,...,9.0,7.0,18.0,8.0,32.0,0.0,4.0,,309.0,94107
