In [1]:
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
import json
import os

from datetime import datetime
from os import path
from matplotlib import pyplot as plt

In [2]:
fname = '../station_information_100319.json'
with open(fname) as sf:
    station_data = json.load(sf)

In [3]:
# Convert the station metadata into a dataframe and save it. 
station_df = pd.DataFrame(station_data["data"]["stations"])

# Add region information to the spreadsheet
short_names = station_df.short_name.values
prefixes = [sn[0:2] for sn in short_names]
city_map = {'BK': 'Berkeley',
            'SF': 'San Francisco',
            'EM': 'Emeryville',
            'SJ': 'San Jose',
            'OK': 'Oakland'}
region_map = {'BK': 'East Bay',
              'SF': 'San Francisco', 
              'EM': 'East Bay',
              'SJ': 'South Bay',
              'OK': 'East Bay'}
station_df['city'] = [city_map[p] for p in prefixes]
station_df['region'] = [region_map[p] for p in prefixes]

station_df.to_csv('../data/station_info.csv')

In [7]:
# Reading all the JSON files we downloaded.
json_fnames = sorted(os.listdir('../downloads'))

# Dict maps keys to lists of data from the series of downloads.
station_timeseries_dict = {}
keys2use = set(['num_bikes_available',
                'num_docks_available',
                'num_docks_disabled',
                'station_id',
                'is_installed',
                'is_returning',
                'num_ebikes_available',
                'num_bikes_disabled',
                'is_renting',
                'last_reported',])

for i, fname in enumerate(json_fnames):
    # timestamp was saved in the filename - pull it out. 
    query_ts = int(fname.split('_')[-1].split('.')[0])
    
    with open(path.join('../downloads/', fname)) as f:
        try:
            json_data = json.load(f)
        except Exception as e:
            print (e)
            print('Skipping file', i, fname)
            continue
            
        stations_info = json_data["data"]["stations"]
        for station_dict in stations_info:
            station_timeseries_dict.setdefault("query_ts", []).append(query_ts)
            
            for key in keys2use:
                station_timeseries_dict.setdefault(key, []).append(
                    station_dict.get(key))

Expecting ':' delimiter: line 1 column 57345 (char 57344)
10535 station_status_1571632118993.json


In [8]:
# Convert to DataFrame - this will be slow
station_timeseries_df = pd.DataFrame(station_timeseries_dict)

# Calculate fraction full and various other derived params
total_docks = station_timeseries_df.num_bikes_available + station_timeseries_df.num_docks_available
station_timeseries_df['fraction_full'] = station_timeseries_df.num_bikes_available / total_docks

full = station_timeseries_df.num_bikes_available == total_docks
empty = station_timeseries_df.num_bikes_available == 0
full_or_empty = np.logical_or(full, empty)

station_timeseries_df['full'] = full.astype('int32')
station_timeseries_df['empty'] = empty.astype('int32')
station_timeseries_df['full_or_empty'] = full_or_empty.astype('int32')

station_timeseries_df['half_full_dev'] = np.abs(station_timeseries_df.fraction_full - 0.5)

# Save - may also take a while.
station_timeseries_df.to_csv('../data/stations_timeseries.csv')