## Initialize

In [1]:
import csv
import json
import geojson
import requests
import pandas as pd
import geopandas as gpd
from datetime import datetime

In [2]:
gbfs_stations = r'https://gbfs.fordgobike.com/gbfs/en/station_information.json'
gbfs_regions = r'https://gbfs.fordgobike.com/gbfs/en/system_regions.json'

## OD trips per month
.csv --> .json

In [3]:
data_files = [
    '201906-baywheels-tripdata.csv',
    '201907-baywheels-tripdata.csv',
    '201908-baywheels-tripdata.csv',
    '201909-baywheels-tripdata.csv',
    '201910-baywheels-tripdata.csv',
    '201911-baywheels-tripdata.csv',
    '201912-baywheels-tripdata.csv',
    '202001-baywheels-tripdata.csv'
]

In [4]:
def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

In [5]:
def od_trips_json(f):
    linksByOrigin = {}
    od_pairs = []
    year = int(f[:4])
    
    with open("../trip_data/" + f, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        print("read csv: " + f)
        
        for trip in reader:
            try:
                origin = trip['start_station_id']
            except ValueError:
                continue
            destination = trip['end_station_id']
            
            if origin not in linksByOrigin:
                linksByOrigin[origin] = []
            links = linksByOrigin[origin]
            
            if [origin, destination] in od_pairs:
                for x in links:
                    if x['source']==origin and x['target']==destination:
                        x['count'] += 1
                        #x['avg_duration'] += int(trip['tripduration'])
                        #x['avg_age'] += year-int(trip['birth year']) if is_number(trip['birth year']) else 0
                        #x['pct_female'] += float(trip['gender'])-1.0 if is_number(trip['birth year']) else 0
                        #x['count_demog'] += 1 if is_number(trip['birth year']) else 0
                        #x['pct_subscriber'] += 1 if trip['usertype']=='Subscriber' else 0
            elif [origin, destination] not in od_pairs:
                links.append({
                    'source': origin, 
                    'target': destination,
                    'count': 1#,
                    #'avg_duration': int(trip['tripduration']),
                    #'avg_age': year-int(trip['birth year']) if is_number(trip['birth year']) else 0,
                    #'pct_female': float(trip['gender'])-1.0 if is_number(trip['birth year']) else 0,
                    #'count_demog': 1 if is_number(trip['birth year']) else 0,
                    #'pct_subscriber': 1 if trip['usertype']=='Subscriber' else 0
                })
                od_pairs.append([origin, destination])

    csvfile.close()
    
    # get average values within linksByOrigin
#     for key in linksByOrigin.keys():
#         for x in linksByOrigin[key]:
#             x['avg_duration'] = x['avg_duration']/x['count']
#             x['avg_age'] = x['avg_age']/x['count_demog'] if x['count_demog'] > 0 else x['avg_age']
#             x['pct_female'] = x['pct_female']/x['count_demog'] if x['count_demog'] > 0 else x['pct_female']
#             x['pct_subscriber'] = x['pct_subscriber']/x['count']
#             del x['count_demog']
    
    with open("../json_data/" + f[:6] + "-tripdata.json", "w") as jsonfile:
        json.dump([linksByOrigin], jsonfile)
    
    jsonfile.close()
    
    print("saved as json: " + f[:6] + "-tripdata.json")

## Stations per month
.csv --> .geojson

In [6]:
def stations_geojson(f):
    # read in CSV
    print("read csv: " + f)
    trips = pd.read_csv("../trip_data/" + f)

    # subset data for only origins and relevant columns
    trips_start = trips[['start_station_id', 'start_station_name', 'start_station_latitude', 'start_station_longitude']]
    trips_start.columns = ['station_id', 'name', 'lat', 'lon']

    # subset data for only destinations and relevant columns
    trips_end = trips[['end_station_id', 'end_station_name', 'end_station_latitude', 'end_station_longitude']]
    trips_end.columns = ['station_id', 'name', 'lat', 'lon']

    # join origins and destinations, filter to only include unique/non-null values
    trips = trips_start.append(trips_end)
    trips.dropna(inplace=True)
    trips.drop_duplicates(inplace=True)
    
    # turn data into geospatial dataframe
    trips_gdf = gpd.GeoDataFrame(trips, geometry=gpd.points_from_xy(trips.lon, trips.lat))
    
    # save data to file
    trips_gdf.to_file(driver='GeoJSON', filename="../geojson_data/stations_"+f[:6]+".geojson")
    print("saved as geojson: stations_" + f[:6] + ".geojson")

## Heatmap of trips per month
.csv --> .csv

In [18]:
def heatmap_csv(f):
    
    # collect stations
    print("read csv: " + f)
    trips = pd.read_csv("../trip_data/" + f)
    stations = trips[trips.start_station_id.notna()].start_station_id.astype(int).unique()
    stations = list(set(stations))
    
    # create dict to store records per station (+1 for total column)
    heatmap_dict = {}
    for d in range(0,7):
        for h in range(0,24):
            heatmap_dict[(d,h)] = [x*0 for x in range(len(stations)+1)]
    
    # fill dict with data
    with open("../trip_data/" + f, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for trip in reader:
            start_time = str(trip['start_time'])
            try:
                station_id = int(trip['start_station_id'])
                start_time = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f')
                start_dow = start_time.weekday()
                start_hour = start_time.hour
                #start_dow = int(trip['WEEKDAY'])
                #start_hour = int(trip['HOUR'])
                heatmap_dict[(start_dow, start_hour)][-1] += 1
                heatmap_dict[(start_dow, start_hour)][stations.index(station_id)] += 1
            except ValueError:
                try:
                    station_id = int(trip['start_station_id'])
                    start_time = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
                    start_dow = start_time.weekday()
                    start_hour = start_time.hour
                    heatmap_dict[(start_dow, start_hour)][-1] += 1
                    heatmap_dict[(start_dow, start_hour)][stations.index(station_id)] += 1
                except ValueError:
                    pass
    csvfile.close()
    
    # write to CSV
    with open("../heatmap_data/heatmap_data_"+f[:6]+".csv", "w", newline='') as csv_file:
        writer = csv.writer(csv_file)
        #data_files_trunc = [x[:6] for x in data_files]
        stations = [str(i) for i in stations]
        writer.writerow(["day"] + ["hour"] + stations + ["total"])
        for d in range(0,7):
            for h in range(0,24):
                writer.writerow([d] + [h] + heatmap_dict[(d,h)])
    print("saved heatmap as: heatmap_data_"+f[:6]+".csv")

## RUN

In [20]:
for f in data_files[1:]:
    od_trips_json(f)
    stations_geojson(f)
    heatmap_csv(f)

read csv: 201907-baywheels-tripdata.csv
saved as json: 201907-tripdata.json
read csv: 201907-baywheels-tripdata.csv
saved as geojson: stations_201907.geojson
read csv: 201907-baywheels-tripdata.csv
saved heatmap as: heatmap_data_201907.csv
read csv: 201908-baywheels-tripdata.csv
saved as json: 201908-tripdata.json
read csv: 201908-baywheels-tripdata.csv
saved as geojson: stations_201908.geojson
read csv: 201908-baywheels-tripdata.csv
saved heatmap as: heatmap_data_201908.csv
read csv: 201909-baywheels-tripdata.csv
saved as json: 201909-tripdata.json
read csv: 201909-baywheels-tripdata.csv
saved as geojson: stations_201909.geojson
read csv: 201909-baywheels-tripdata.csv
saved heatmap as: heatmap_data_201909.csv
read csv: 201910-baywheels-tripdata.csv
saved as json: 201910-tripdata.json
read csv: 201910-baywheels-tripdata.csv
saved as geojson: stations_201910.geojson
read csv: 201910-baywheels-tripdata.csv
saved heatmap as: heatmap_data_201910.csv
read csv: 201911-baywheels-tripdata.csv
