In [1]:
import csv
import json
import datetime as datetime
import pandas as pd
import geopandas as gpd

In [2]:
working_dir = r'../'
data_files = [
    '202011-baywheels-tripdata.csv',
    '202012-baywheels-tripdata.csv',
    '202101-baywheels-tripdata.csv'
]

### CSV to GeoJSON

In [34]:
for f in data_files:
    print("reading csv: " + f)
    df = pd.read_csv(working_dir+"/trip_data/"+f)
    
    # pull origin stations
    start_df = df[['start_station_name', 'start_station_id', 'start_lat', 'start_lng']].dropna()
    start_df.columns = ["station_id", "name", "lat", "lon"]
    
    # pull destination stations
    end_df = df[['end_station_name', 'end_station_id', 'end_lat', 'end_lng']].dropna()
    end_df.columns = ["station_id", "name", "lat", "lon"]
    
    # merge and drop duplicates
    df = start_df.append(end_df).drop_duplicates(subset=['station_id', 'name'], keep='first').reset_index(drop=True)
    
    # convert df to gdf and save to file
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat))
    gdf.to_file(working_dir+"/geojson_data/stations_"+f[:6]+".geojson", driver="GeoJSON")

reading csv: 202011-baywheels-tripdata.csv
reading csv: 202012-baywheels-tripdata.csv
reading csv: 202101-baywheels-tripdata.csv


### Day Hour Heatmap by Station

In [None]:
for f in data_files:
    # collect stations
    stations = []
    with open(working_dir + "/trip_data/" + f, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        print("reading csv: " + f)
        for trip in reader:
            try:
                stations.append(int(trip['start_station_id']))
            except ValueError:
                continue
        #end for
    #end with
    csvfile.close()
    stations = list(set(stations))
    
    # create dict to store records per station (+1 for total column)
    heatmap_dict = {}
    for d in range(0,7):
        for h in range(0,24):
            heatmap_dict[(d,h)] = [x*0 for x in range(len(stations)+1)]
        #end for
    #end for
    
    # fill dict with data
    with open(working_dir + "/trip_data/" + f, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        for trip in reader:
            trip = dict(('start_time' if k=='started_at' else k, v) for k, v in trip.items())
            start_time = str(trip['start_time'])
            try:
                station_id = int(trip['start_station_id'])
                start_time = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S.%f')
                start_dow = start_time.weekday()
                start_hour = start_time.hour
                #start_dow = int(trip['WEEKDAY'])
                #start_hour = int(trip['HOUR'])
                heatmap_dict[(start_dow, start_hour)][-1] += 1
                heatmap_dict[(start_dow, start_hour)][stations.index(station_id)] += 1
            except ValueError:
                try:
                    station_id = int(trip['start_station_id'])
                    start_time = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')
                    start_dow = start_time.weekday()
                    start_hour = start_time.hour
                    heatmap_dict[(start_dow, start_hour)][-1] += 1
                    heatmap_dict[(start_dow, start_hour)][stations.index(station_id)] += 1
                except ValueError:
                    pass
                #end try
            #end try
        #end for
    #end with
    csvfile.close()
    
    # write to CSV
    with open(working_dir+ "/heatmap_data/heatmap_data_"+f[:6]+".csv", "w") as csv_file:
        writer = csv.writer(csv_file, lineterminator='\n')
        #data_files_trunc = [x[:6] for x in data_files]
        writer.writerow(["day"] + ["hour"] + stations + ["total"])
        for d in range(0,7):
            for h in range(0,24):
                writer.writerow([d] + [h] + heatmap_dict[(d,h)])
            #end for
        #end for
    #end with
#end for

### Links by Origin Parser

In [None]:
def is_number(s):
    try:
        float(s)
        return True
    except ValueError:
        return False

In [None]:
for f in data_files:
    linksByOrigin = {}
    od_pairs = []
    year = int(f[:4])
    with open(working_dir + "/trip_data/" + f, 'r') as csvfile:
        reader = csv.DictReader(csvfile)
        print("read csv: " + f)
        for trip in reader:
            try:
                origin = trip['start_station_id']
            except ValueError:
                continue
            #end try
            destination = trip['end_station_id']
            if origin not in linksByOrigin:
                linksByOrigin[origin] = []
            #end if
            links = linksByOrigin[origin]
            if [origin, destination] in od_pairs:
                for x in links:
                    if x['source']==origin and x['target']==destination:
                        x['count'] += 1
                    #end if
                #end for
            elif [origin, destination] not in od_pairs:
                links.append({
                    'source': origin, 
                    'target': destination,
                    'count': 1
                })
                od_pairs.append([origin, destination])
            #end if
        #end for
    #end with
    csvfile.close()
    
    # write to json
    with open(working_dir + "/json_data/" + f[:6] + "-tripdata.json", "w") as jsonfile:
        json.dump([linksByOrigin], jsonfile)
    jsonfile.close()
    print("saved as json: " + f[:6] + "-tripdata.json")
#end for