Here, I plot Citi Bike trips from the month of August 2018 over a map of New York City. The Citi Bike dataset only contains the duration and location of the beginning/end of each ride, so I will display their appromixate location, assuming they are moving in a straight line at a constant velocity.

I do not plot any rides that begin/end at the same stop. I chose to do this because these are rides that will be poorly approximated by the method above (their average velocity is 0)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#Convert time of ride from seconds to minutes, rounded down
def sec_to_min(seconds:int)->'int(Minutes)':
    return int(seconds/60)

#Adjust time to represent how many minutes into month
def fix_time_formatting(time:int)->'int(Minutes into month)':
    day = int(time[8:10])
    hour = int(time[11:13])
    minute = int(time[14:16])
    return (1440*(day-1) + 60*hour + minute)

#Import data into df 
df_trips = pd.read_csv('201808-citibike-tripdata.csv')
df_trips['tripduration'] = df_trips['tripduration'].apply(sec_to_min)
df_trips['mins_into_month'] = df_trips['starttime'].apply(fix_time_formatting)
df_trips = df_trips.dropna(how = 'any',axis=0) 

In [2]:
#Prepare weather dataset

#Import hourly temperature and precipitation
df_weather = pd.read_csv('Central_Park_weather_2018.csv', usecols=[1,43,44], 
                         dtype={'DATE':str, 'HourlyDryBulbTemperature':str, 'HourlyPrecipitation':str})

#Keep only readings from the month of August
df_weather_august = df_weather[df_weather['DATE'].str.contains('2018-08')]
df_weather_august.fillna(method = 'ffill')

#Remove all null temperature rows
df_weather_august = df_weather_august[df_weather_august.HourlyDryBulbTemperature.notnull()]

#Replace all T values from Precipitation column
df_weather_august.loc[df_weather_august.HourlyPrecipitation == 'T', 'HourlyPrecipitation'] = '0'

#Fix time formatting
df_weather_august['mins_into_month'] = df_weather_august['DATE'].apply(fix_time_formatting)

In [3]:
#Merge datasets

#Add initial weather conditions
df_weather_august.iloc[0] = ['', 75, 0, 0]

#Merge on mins_into_month
df_merged = df_trips.merge(df_weather_august, on = 'mins_into_month', how = 'left')

#Forward fill: weather is accurate until a new reading is available
df_merged = df_merged.fillna(method = 'ffill')

In [4]:
%matplotlib notebook
import matplotlib.image as mimage
nyc_area_map = mimage.imread('NYCmap.png')

#Turns a row from the df into a list for plotting
#Velocity is calculated based on moving at const speed from start->end
def rideify(ride)->'List[posx, posy, velx, vely, time]':
    duration = int(ride['tripduration'])
    initial_position_x = float(ride['start station longitude'])
    initial_position_y = float(ride['start station latitude'])
    final_position_x = float(ride['end station longitude'])
    final_position_y = float(ride['end station latitude'])
    velocity_x = (final_position_x - initial_position_x)/float(duration)
    velocity_y = (final_position_y - initial_position_y)/float(duration)
    return [initial_position_x, initial_position_y, velocity_x, velocity_y, duration+1]

#Prepares a list of X,Y coordinates from each ride for plotting
def process_rides_for_plot(trips)->'List[longs,lats]':
    X = []
    Y = []    
    for trip in trips:
        X.append(trip[0])
        Y.append(trip[1])
        trip[4] -= 1
    return [X,Y]

#Updates the position coordinates for each ride
def move_bicycles(trips)->'List[trips[]]':
    updated_trips = []
    for trip in trips:
        if trip[4] > 0:
            trip[0] += trip[2]
            trip[1] += trip[3]
            updated_trips.append(trip)
    return updated_trips

#Get the day:hour:min from how many minutes into the month
def get_date(min_into_month)->"Str(DD, YYYY HH:MM)":
    day = str(1 + int(min_into_month/1440))
    hour = str(int((min_into_month%1440)/60))
    minute = ('0' + str(int((min_into_month%1440)%60)))[-2:]
    return(day + ', 2018 ' + hour + ':' + minute)

#Generate the graphs
fig, ax = plt.subplots(1, figsize=(6,6))
            
#List of current bike trips
active_trips = list()

#Plot trips
mins_into_month = 6*60
current_temp = 0
current_precip = 0.0
while mins_into_month < (6*60+300):
    #Add trips that started on a given minute
    rides = df_merged.loc[df_merged['mins_into_month'] == mins_into_month]
    for index, row in rides.iterrows():
        #Only plot trips that start/end at different stations
        if int(row['start station id']) != int(row['end station id']):
            active_trips.append(rideify(row))
    
    #Assemble trips into X,Y coordinates for plotting
    [X,Y] = process_rides_for_plot(active_trips)
    
    #Update temp and precipitation
    if mins_into_month in df_merged['mins_into_month'].values:
        current_temp = df_merged.loc[df_merged['mins_into_month'] == mins_into_month, 'HourlyDryBulbTemperature'].iloc[0]
        current_precip = df_merged.loc[df_merged['mins_into_month'] == mins_into_month, 'HourlyPrecipitation'].iloc[0]
    
    #Build and draw plot
    ax.clear()
    ax.scatter(X,Y)
    ax.set_title(('August ' + get_date(mins_into_month))
                 + ('   Trips: ' + str(len(active_trips)))
                 + ('   Temp: ' + str(current_temp) + ' F')
                 + ('   Precip: ' + str(current_precip) + ' in/hr'))
    ax.imshow(nyc_area_map, extent=[-74.049279, -73.901044, 
                                    40.701997, 40.799448])
    ax.set_ylim([40.7,40.8])
    ax.set_xlim([-74.05, -73.9])
    ax.set(xlabel = 'Longitude')
    ax.set(ylabel = 'Latitude')
    fig.canvas.draw()
    
    #Adjust bicycle positions and remove finished trips from active trips list
    active_trips = move_bicycles(active_trips)
    
    #Move one minute forward
    mins_into_month += 1

<IPython.core.display.Javascript object>

In [5]:
%matplotlib notebook
import matplotlib.image as mimage
nyc_area_map = mimage.imread('NYCmap.png')

#Turns a row from the df into a list for plotting
def rideify(ride)->'List[start_station_id, duration]':
    start_station = int(ride['start station id'])
    station_rates[station_ids.index(start_station)] += 1
    duration = int(ride['tripduration'])
    return [start_station, duration+1]

#Prepares a list of X,Y coordinates from each station for plotting
def process_stations_for_plotting(stations)->'List[X,Y] for plotting':
    X = []
    Y = []    
    for station in stations:
        X.append(station[0])
        Y.append(station[1])
    return [X,Y]

#Updates the position coordinates for each ride
def move_bicycles(trips)->'List[trips[]]':
    updated_trips = []
    for trip in trips:
        if trip[1] > 0:
            trip[1] -= 1
            updated_trips.append(trip)
        else:
            station_rates[station_ids.index(trip[0])] -= 1
    return updated_trips

#Get the day:hour:min from how many minutes into the month
def get_date(min_into_month)->"Str(DD, YYYY HH:MM)":
    day = str(1 + int(min_into_month/1440))
    hour = str(int((min_into_month%1440)/60))
    minute = ('0' + str(int((min_into_month%1440)%60)))[-2:]
    return(day + ', 2018 ' + hour + ':' + minute)

#Identify station coordinates for plotting
station_list_df = df_trips.drop_duplicates('start station id')
station_coords = station_list_df[['start station latitude', 'start station longitude']].values.tolist()
station_ids = station_list_df['start station id'].values.tolist()

[Y,X] = process_stations_for_plotting(station_coords)

#Generate the graphs
fig, ax = plt.subplots(1, figsize=(6,6))

#List of current bike trips
active_trips = list()

#List of stations and their usage
station_ids = [int(x) for x in station_ids]
station_rates = [1 for i in range(len(station_ids))]

#Plot trips
mins_into_month = 6*60
current_temp = 0
current_precip = 0.0
while mins_into_month < 6*60+300:
    #Add trips that started on a given minute
    rides = df_merged.loc[df_merged['mins_into_month'] == mins_into_month]
    for index, row in rides.iterrows():
        active_trips.append(rideify(row))
        
    #Update temp and precipitation
    if mins_into_month in df_merged['mins_into_month'].values:
        current_temp = df_merged.loc[df_merged['mins_into_month'] == mins_into_month, 'HourlyDryBulbTemperature'].iloc[0]
        current_precip = df_merged.loc[df_merged['mins_into_month'] == mins_into_month, 'HourlyPrecipitation'].iloc[0]

    #Build and draw plot
    ax.clear()
    ax.scatter(X, Y, s = [8*rate for rate in station_rates])
    ax.set_title(('August ' + get_date(mins_into_month))
                 + ('   Temp: ' + str(current_temp) + ' F')
                 + ('   Precipitation: ' + str(current_precip) + ' in/hr'))
    ax.imshow(nyc_area_map, extent=[-74.049279, -73.901044, 
                                    40.701997, 40.799448])
    ax.set_ylim([40.7,40.8])
    ax.set_xlim([-74.05, -73.9])
    ax.set(xlabel = 'Longitude')
    ax.set(ylabel = 'Latitude')
    fig.canvas.draw()
    
    #Adjust bicycle positions and remove finished trips from active trips list
    active_trips = move_bicycles(active_trips)
    
    #Move one minute forward
    mins_into_month += 1

<IPython.core.display.Javascript object>

In [6]:
#Count how many rides took place in [good, bad] weather
station_weather_rates = [[0,0] for i in range(len(station_ids))]

#[Minutes of good weather, minutes of bad weather]
weather_mins = [0,0]

#Make a table with the time, station id, and weather conditions of each ride
weather_rides = df_merged.apply(lambda row: [int(row['starttime'][11:13]), int(row['start station id']), 
                                             int(row['HourlyDryBulbTemperature']), float(row['HourlyPrecipitation']), 
                                             int(row['mins_into_month'])], axis=1)

mins_into_month = -1
for i in weather_rides:
    #If within 6AM-7PM
    if i[0] >= 6 and i[0] < 19:
        #If weather is good
        if ((i[2] > 32 and i[2] < 90) and (i[3] < 0.02)):
            station_weather_rates[station_ids.index(i[1])][0] += 1
            if i[4] > mins_into_month:
                weather_mins[0] += 1
                mins_into_month = i[4]
        else:
            station_weather_rates[station_ids.index(i[1])][1] += 1
            if i[4] > mins_into_month:
                weather_mins[1] += 1
                mins_into_month = i[4]

In [16]:
final_station_list = []
final_station_rates = []

for i in range(len(station_weather_rates)):
    if station_weather_rates[i][0] >= 10 and station_weather_rates[i][1] >= 10:
        final_station_list.append(station_ids[i])
        final_station_rates.append([station_weather_rates[i][0], station_weather_rates[i][1]])

good_ratio = [final_station_rates[i][0]/weather_mins[0] for i in range(len(final_station_rates))]
bad_ratio = [final_station_rates[i][1]/weather_mins[1] for i in range(len(final_station_rates))]
good_to_bad = [good_ratio[i]/bad_ratio[i] for i in range(len(good_ratio))]

print('This is the average ratio of rides taken in good:bad weather:')
print(sum(good_to_bad)/len(good_to_bad))

print('Here are the station ids of those that performed best in good weather vs. bad weather:')
for i in range(4):
    print(str(station_ids[good_to_bad.index(max(good_to_bad))]))
    #print(max(good_to_bad))
    good_to_bad[good_to_bad.index(max(good_to_bad))] = 0

This is the average ratio of rides taken in good:bad weather:
1.6267222113991917
Here are the station ids of those that performed best in good weather vs. bad weather:
3684
3604
3391
3491


In [17]:
%matplotlib notebook
import matplotlib.image as mimage
nyc_area_map = mimage.imread('NYCmap.png')

#Prepares a list of X,Y coordinates from each station for plotting
def process_stations_for_plotting(stations)->'List[X,Y] for plotting':
    X = []
    Y = []    
    for station in stations:
        X.append(station[0])
        Y.append(station[1])
    return [X,Y]

#Identify station coordinates for plotting
station_list_df = df_trips.drop_duplicates('start station id')
station_coords = station_list_df[['start station latitude', 'start station longitude']].values.tolist()
station_ids = station_list_df['start station id'].values.tolist()

bad_weather_stations = []

#Top 3 from earlier
bad_weather_stations.append(station_coords[station_ids.index(3684)])
bad_weather_stations.append(station_coords[station_ids.index(3604)])
bad_weather_stations.append(station_coords[station_ids.index(3391)])
bad_weather_stations.append(station_coords[station_ids.index(3491)])

[Y,X] = process_stations_for_plotting(bad_weather_stations)

#Generate the graphs
fig, ax = plt.subplots(1, figsize=(6,6))

#List of current bike trips
active_trips = list()

#List of stations and their usage
station_ids = [int(x) for x in station_ids]
station_rates = [0 for i in range(len(station_ids))]

#Build and draw plot
ax.clear()
ax.scatter(X, Y)
ax.imshow(nyc_area_map, extent=[-74.049279, -73.901044, 
                                40.701997, 40.799448])
ax.set_ylim([40.7,40.8])
ax.set_xlim([-74.05, -73.9])
ax.set(xlabel = 'Longitude')
ax.set(ylabel = 'Latitude')

<IPython.core.display.Javascript object>

[Text(0, 0.5, 'Latitude')]