In [58]:
import os
import pandas as pd
import sys
from pathlib import Path
import numpy as np
import json

In [37]:
def get_year_files():
    path = os.getcwd()
    p = Path(path).parents[1]
    directory_path = os.path.abspath(p) + "\\data"
    file_path = directory_path.replace("\\", "/")
    files_name = os.listdir(directory_path)
    files = []
    for file_name in files_name:
        if "Mobi_System_Data" in file_name:
            file = file_path + "/" + file_name
            files.append(file)
    return files

In [40]:
def get_year_data(year, files):
    year = str(year)
    print("read " + year)
    for file in files:
        if year in file:
            return pd.read_csv(file, compression="zip")

In [41]:
data_2018 = get_year_data(2018, get_year_files())
data_2019 = get_year_data(2019, get_year_files())
data_2020 = get_year_data(2020, get_year_files())
data_2021 = get_year_data(2021, get_year_files())


read 2018
read 2019
read 2020
read 2021


In [43]:
def get_date_range_departure(year_df, start_date, end_date):
    return year_df.loc[(year_df["Departure"] >= str(start_date)) & (year_df["Departure"] <= str(end_date))]


In [49]:
def group_by_departure_station(_df):
    return _df.groupby(['Departure station']).size().reset_index(name='ride_count').sort_values(["ride_count"], ascending=False)


In [55]:
def normalize_ride_count(_df):
    norm = np.linalg.norm(_df["ride_count"])
    _df["ride_count"] = round(((_df["ride_count"]/norm) * 1000), 4)
    return _df

In [62]:
def get_station_geo():
    geo_file = file_path + "/tmp/geocodings.pickle"
    gps_data = pd.read_pickle(geo_file)
    stationid, station, zip, lon, lat = [], [], [], [], []
    # print(type(gps_data))
    for row in gps_data.items():
        station.append(row[0])
        stationid.append(row[0].split(" ")[0])
        zip.append(row[1][0])
        lat.append(float(str(row[1][1])[0:10]))
        lon.append(float(str(row[1][2])[0:12]))
        
        # print("station id: {},    station name: {}".format(station, station))
    # print("length station {}, zip: {}, lon: {}, lat:{}".format(len(station), len(zip), len(lon), len(lat)))
    station_df = pd.DataFrame()
    station_df['station_id'] = stationid
    station_df['station'] = station
    station_df['zip'] = zip
    station_df["lon"] = lon
    station_df["lat"] = lat
    station_df = station_df.sort_values(["station_id"], ascending=True)
    return station_df.reset_index()


In [64]:
df = get_date_range_departure(data_2020, "2020-10-01", "2020-11-01")
df_sorted = group_by_departure_station(df)
df_sorted_normalized = normalize_ride_count(df_sorted)
df_station_geo = get_station_geo()

length station 254, zip: 254, lon: 254, lat:254


In [68]:
df_merged = pd.merge(left=df_sorted_normalized, right=df_station_geo, how='left', left_on='Departure station', right_on='station').dropna()
df_merged


Unnamed: 0,Departure station,ride_count,index,station_id,station,zip,lon,lat
0,0209 Stanley Park - Information Booth,179.7039,33.0,0209,0209 Stanley Park - Information Booth,V6G3E2,-123.131478,49.297769
1,0105 Stanley Park - Totem Poles,178.2777,32.0,0105,0105 Stanley Park - Totem Poles,V6G,-123.121176,49.298657
2,0028 Davie & Beach,175.9007,40.0,0028,0028 Davie & Beach,V6G,-123.142139,49.287528
3,0174 1st & Manitoba,174.9499,205.0,0174,0174 1st & Manitoba,R7A6C3,-99.938347,49.838303
4,0050 Bute & Comox,171.1466,50.0,0050,0050 Bute & Comox,V6E1K6,-123.131099,49.283343
...,...,...,...,...,...,...,...,...
197,0233 Thornton & National,9.7458,197.0,0233,0233 Thornton & National,V6A3X2,-123.092723,49.273777
198,0985 Quebec Yard - To Service,9.7458,241.0,0985,0985 Quebec Yard - To Service,H2Z1X4,-73.563392,45.506058
199,0981 Workshop - Service Complete,7.8442,219.0,0981,0981 Workshop - Service Complete,L7L5H7,-79.763537,43.390667
200,0285 Commercial & Pandora,4.5164,191.0,0285,0285 Commercial & Pandora,V5L0A3,-123.070443,49.283236


In [69]:
station_list = []

for idx, row in df_merged.iterrows():
    v1, v2 = str(row['lat']), str(row['lon'])
    v3 = row['ride_count']
    st = "location: new google.maps.LatLng({}, {}), weight: {}".format(v1, v2, v3)
    station_list.append("{" +st + "},")

with open('station_google_cmd.txt', 'w') as f:
    for station in station_list:
        f.write(station)
        f.write('\n')

In [7]:
# import urllib.request, json 
# with urllib.request.urlopen("https://vancouver-gbfs.smoove.pro/gbfs/en/station_information.json") as url:
#     s_data = json.loads(url.read().decode())
# s_df = pd.json_normalize(s_data['data']['stations'])
# s_df['station'] = s_df['station_id'] + " " + s_df['name'] 
# s_df.to_csv(r'np.txt', header=None, index=None, sep='\t', mode='a')

Unnamed: 0,station_id,name,lat,lon,capacity,station
0,0001,10th & Cambie,49.262487,-123.114397,35,0001 10th & Cambie
1,0002,Burrard Station (Melville & Dunsmuir),49.285871,-123.121050,28,0002 Burrard Station (Melville & Dunsmuir)
2,0004,Yaletown-Roundhouse Station,49.274566,-123.121817,16,0004 Yaletown-Roundhouse Station
3,0005,Dunsmuir & Beatty,49.279764,-123.110154,26,0005 Dunsmuir & Beatty
4,0006,Olympic Village Station,49.266314,-123.116011,26,0006 Olympic Village Station
...,...,...,...,...,...,...
204,0297,6th & Prince Edward,49.265242,-123.095577,16,0297 6th & Prince Edward
205,0298,6th & Carolina,49.265213,-123.090785,16,0298 6th & Carolina
206,0300,Bute & Barclay,49.284893,-123.128685,14,0300 Bute & Barclay
207,0305,Georgia & Homer,49.280787,-123.115271,18,0305 Georgia & Homer
