In [1]:
import math
import numpy as np
import pandas as pd
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

In [2]:
DATA_DIR = '../data'
CITIBIKE_CSV_DIR = f'{DATA_DIR}/citibike/csv'
EXPORTS_DIR = f'{DATA_DIR}/citibike/exports'

In [3]:
DATE_PREFIXES = np.load(f'{DATA_DIR}/citibike/date_prefixes.npy')

In [4]:
stations_df = pd.read_csv(f'{EXPORTS_DIR}/station_coords.csv')
station_ids = stations_df['id'].unique()
len(station_ids)

3167

In [5]:
def is_integer_num(n):
    if isinstance(n, int):
        return True
    if isinstance(n, float):
        return n.is_integer()
    return False

def hash_station_id(station_id):
    if is_integer_num(station_id):
        station_id = int(station_id)
    return str(station_id)

In [7]:
for date_prefix in tqdm(DATE_PREFIXES):
    df = pd.read_csv(f'{CITIBIKE_CSV_DIR}/{date_prefix}-citibike-tripdata.csv')
    df = df.rename(
        columns={
            'start station id': 'start_station_id',
            'start station latitude': 'start_lat',
            'start station longitude': 'start_lon',
            'end station id': 'end_station_id',
            'end station latitude': 'end_lat',
            'end station longitude': 'end_lon',
            'Start Station ID': 'start_station_id',
            'Start Station Latitude': 'start_lat',
            'Start Station Longitude': 'start_lon',
            'End Station ID': 'end_station_id',
            'End Station Latitude': 'end_lat',
            'End Station Longitude': 'end_lon',
            'start_lng': 'start_lon',
            'end_lng': 'end_lon',
        })

    # Standarize station ID.
    df['start_station_id'] = df['start_station_id'].apply(
        lambda row: hash_station_id(row))
    df['end_station_id'] = df['end_station_id'].apply(
        lambda row: hash_station_id(row))
    df = df.sort_values(by=['start_station_id', 'end_station_id'])

    # Group trips by origin and destination. Aggregate trip count.
    count_df = df[['start_station_id', 'end_station_id']].reset_index()
    count_df = count_df.groupby(
        by=['start_station_id', 'end_station_id']).count().reset_index()
    count_df = count_df.rename(columns={'index': 'count'})

    # Convert to adjacency matrix. Origins are rows, destinations are columns.
    adj_matrix = pd.crosstab(count_df['start_station_id'],
                             count_df['end_station_id'],
                             count_df['count'],
                             aggfunc=np.sum,
                             dropna=False)
    adj_matrix = adj_matrix.fillna(0)
    adj_matrix[adj_matrix.columns] = adj_matrix[adj_matrix.columns].astype(int)

    # Remove extra index information.
    ids = adj_matrix.columns.union(adj_matrix.index).sort_values()
    adj_matrix = adj_matrix.reindex(index=ids, columns=ids, fill_value=0)

    adj_matrix.to_csv(f'{EXPORTS_DIR}/adjacency_matrices/' +
                      f'{date_prefix}_trip_count.csv')
    print(f'Wrote {EXPORTS_DIR}/adjacency_matrices/' +
          f'{date_prefix}_trip_count.csv; ' + f'len: {len(adj_matrix)}')

  1%|          | 1/97 [00:04<06:39,  4.16s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201310_trip_count.csv; len: 330


  2%|▏         | 2/97 [00:06<05:13,  3.30s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201311_trip_count.csv; len: 330


  3%|▎         | 3/97 [00:08<04:01,  2.57s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201312_trip_count.csv; len: 329


  4%|▍         | 4/97 [00:09<03:06,  2.00s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201401_trip_count.csv; len: 329


  5%|▌         | 5/97 [00:10<02:25,  1.58s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201402_trip_count.csv; len: 329


  6%|▌         | 6/97 [00:12<02:25,  1.59s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201403_trip_count.csv; len: 332


  7%|▋         | 7/97 [00:14<02:52,  1.91s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201404_trip_count.csv; len: 330


  8%|▊         | 8/97 [00:18<03:34,  2.40s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201405_trip_count.csv; len: 328


  9%|▉         | 9/97 [00:22<04:11,  2.86s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201406_trip_count.csv; len: 328


 10%|█         | 10/97 [00:25<04:35,  3.17s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201407_trip_count.csv; len: 328


 11%|█▏        | 11/97 [00:29<04:49,  3.37s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201408_trip_count.csv; len: 326


 12%|█▏        | 12/97 [00:33<05:00,  3.53s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201409_trip_count.csv; len: 328


 13%|█▎        | 13/97 [00:36<04:50,  3.46s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201410_trip_count.csv; len: 329


 14%|█▍        | 14/97 [00:38<04:11,  3.03s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201411_trip_count.csv; len: 329


 15%|█▌        | 15/97 [00:40<03:30,  2.57s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201412_trip_count.csv; len: 330


 16%|█▋        | 16/97 [00:41<02:47,  2.07s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201501_trip_count.csv; len: 330


 18%|█▊        | 17/97 [00:42<02:11,  1.64s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201502_trip_count.csv; len: 328


 19%|█▊        | 18/97 [00:43<01:55,  1.46s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201503_trip_count.csv; len: 328


 20%|█▉        | 19/97 [00:45<02:15,  1.74s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201504_trip_count.csv; len: 327


 21%|██        | 20/97 [00:49<03:02,  2.37s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201505_trip_count.csv; len: 327


 22%|██▏       | 21/97 [00:52<03:18,  2.61s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201506_trip_count.csv; len: 326


 23%|██▎       | 22/97 [00:56<03:55,  3.13s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201507_trip_count.csv; len: 330


 24%|██▎       | 23/97 [01:01<04:32,  3.69s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201508_trip_count.csv; len: 421


 25%|██▍       | 24/97 [01:07<05:06,  4.19s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201509_trip_count.csv; len: 453


 26%|██▌       | 25/97 [01:12<05:23,  4.50s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201510_trip_count.csv; len: 471


 27%|██▋       | 26/97 [01:16<05:11,  4.39s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201511_trip_count.csv; len: 475


 28%|██▊       | 27/97 [01:19<04:33,  3.91s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201512_trip_count.csv; len: 477


 29%|██▉       | 28/97 [01:21<03:49,  3.32s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201601_trip_count.csv; len: 479


 30%|██▉       | 29/97 [01:23<03:21,  2.97s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201602_trip_count.csv; len: 481


 31%|███       | 30/97 [01:26<03:24,  3.05s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201603_trip_count.csv; len: 482


 32%|███▏      | 31/97 [01:30<03:41,  3.36s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201604_trip_count.csv; len: 483


 33%|███▎      | 32/97 [01:35<04:09,  3.83s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201605_trip_count.csv; len: 482


 34%|███▍      | 33/97 [01:41<04:50,  4.55s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201606_trip_count.csv; len: 489


 35%|███▌      | 34/97 [01:47<05:10,  4.92s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201607_trip_count.csv; len: 492


 36%|███▌      | 35/97 [01:54<05:38,  5.46s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201608_trip_count.csv; len: 582


 37%|███▋      | 36/97 [02:01<06:04,  5.97s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201609_trip_count.csv; len: 624


 38%|███▊      | 37/97 [02:08<06:14,  6.24s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201610_trip_count.csv; len: 615


 39%|███▉      | 38/97 [02:13<05:47,  5.89s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201611_trip_count.csv; len: 614


 40%|████      | 39/97 [02:16<04:57,  5.13s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201612_trip_count.csv; len: 613


 41%|████      | 40/97 [02:19<04:14,  4.46s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201701_trip_count.csv; len: 612


 42%|████▏     | 41/97 [02:22<03:47,  4.07s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201702_trip_count.csv; len: 619


 43%|████▎     | 42/97 [02:25<03:23,  3.71s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201703_trip_count.csv; len: 619


 44%|████▍     | 43/97 [02:31<03:48,  4.24s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201704_trip_count.csv; len: 629


 45%|████▌     | 44/97 [02:37<04:20,  4.92s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201705_trip_count.csv; len: 634


 46%|████▋     | 45/97 [02:45<04:53,  5.65s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201706_trip_count.csv; len: 635


 47%|████▋     | 46/97 [02:52<05:14,  6.17s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201707_trip_count.csv; len: 634


 48%|████▊     | 47/97 [03:00<05:32,  6.66s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201708_trip_count.csv; len: 639


 49%|████▉     | 48/97 [03:08<05:47,  7.09s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201709_trip_count.csv; len: 696


 51%|█████     | 49/97 [03:16<05:56,  7.44s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201710_trip_count.csv; len: 785


 52%|█████▏    | 50/97 [03:22<05:26,  6.94s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201711_trip_count.csv; len: 761


 53%|█████▎    | 51/97 [03:26<04:34,  5.97s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201712_trip_count.csv; len: 758


 54%|█████▎    | 52/97 [03:28<03:41,  4.93s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201801_trip_count.csv; len: 768


 55%|█████▍    | 53/97 [03:31<03:11,  4.36s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201802_trip_count.csv; len: 770


 56%|█████▌    | 54/97 [03:35<02:55,  4.07s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201803_trip_count.csv; len: 771


 57%|█████▋    | 55/97 [03:39<03:00,  4.31s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201804_trip_count.csv; len: 772


 58%|█████▊    | 56/97 [03:46<03:25,  5.02s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201805_trip_count.csv; len: 778


 59%|█████▉    | 57/97 [03:53<03:47,  5.69s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201806_trip_count.csv; len: 776


 60%|█████▉    | 58/97 [04:01<03:59,  6.15s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201807_trip_count.csv; len: 785


 61%|██████    | 59/97 [04:10<04:37,  7.29s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201808_trip_count.csv; len: 780


 62%|██████▏   | 60/97 [04:20<04:51,  7.89s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201809_trip_count.csv; len: 784


 63%|██████▎   | 61/97 [04:29<04:59,  8.31s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201810_trip_count.csv; len: 778


 64%|██████▍   | 62/97 [04:35<04:28,  7.67s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201811_trip_count.csv; len: 769


 65%|██████▍   | 63/97 [04:40<03:50,  6.78s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201812_trip_count.csv; len: 768


 66%|██████▌   | 64/97 [04:44<03:20,  6.08s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201901_trip_count.csv; len: 774


 67%|██████▋   | 65/97 [04:49<02:57,  5.55s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201902_trip_count.csv; len: 771


 68%|██████▊   | 66/97 [04:55<02:59,  5.77s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201903_trip_count.csv; len: 774


 69%|██████▉   | 67/97 [05:03<03:14,  6.49s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201904_trip_count.csv; len: 791


 70%|███████   | 68/97 [05:12<03:31,  7.30s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201905_trip_count.csv; len: 804


 71%|███████   | 69/97 [05:22<03:48,  8.15s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201906_trip_count.csv; len: 806


 72%|███████▏  | 70/97 [05:33<04:03,  9.00s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201907_trip_count.csv; len: 801


 73%|███████▎  | 71/97 [05:45<04:16,  9.85s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201908_trip_count.csv; len: 808


 74%|███████▍  | 72/97 [05:57<04:20, 10.43s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201909_trip_count.csv; len: 820


 75%|███████▌  | 73/97 [06:07<04:07, 10.31s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201910_trip_count.csv; len: 848


 76%|███████▋  | 74/97 [06:14<03:34,  9.34s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201911_trip_count.csv; len: 877


 77%|███████▋  | 75/97 [06:19<02:52,  7.85s/it]

Wrote ../data/citibike/exports/adjacency_matrices/201912_trip_count.csv; len: 882


 78%|███████▊  | 76/97 [06:24<02:31,  7.23s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202001_trip_count.csv; len: 896


 79%|███████▉  | 77/97 [06:30<02:13,  6.68s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202002_trip_count.csv; len: 894


 80%|████████  | 78/97 [06:35<01:57,  6.19s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202003_trip_count.csv; len: 899


 81%|████████▏ | 79/97 [06:38<01:35,  5.30s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202004_trip_count.csv; len: 891


 82%|████████▏ | 80/97 [06:45<01:38,  5.81s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202005_trip_count.csv; len: 934


 84%|████████▎ | 81/97 [06:54<01:48,  6.79s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202006_trip_count.csv; len: 970


 85%|████████▍ | 82/97 [07:04<01:57,  7.85s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202007_trip_count.csv; len: 1007


 86%|████████▌ | 83/97 [07:16<02:05,  8.97s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202008_trip_count.csv; len: 1059


 87%|████████▋ | 84/97 [07:29<02:10, 10.06s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202009_trip_count.csv; len: 1112


 88%|████████▊ | 85/97 [07:40<02:06, 10.58s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202010_trip_count.csv; len: 1163


 89%|████████▊ | 86/97 [07:50<01:52, 10.21s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202011_trip_count.csv; len: 1161


 90%|████████▉ | 87/97 [07:56<01:28,  8.89s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202012_trip_count.csv; len: 1184


 91%|█████████ | 88/97 [08:01<01:11,  7.97s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202101_trip_count.csv; len: 1227


 92%|█████████▏| 89/97 [08:05<00:53,  6.74s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202102_trip_count.csv; len: 1353


 93%|█████████▎| 90/97 [08:13<00:50,  7.17s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202103_trip_count.csv; len: 1415


 94%|█████████▍| 91/97 [08:25<00:50,  8.47s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202104_trip_count.csv; len: 1484


 95%|█████████▍| 92/97 [08:39<00:51, 10.24s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202105_trip_count.csv; len: 1530


 96%|█████████▌| 93/97 [08:58<00:51, 12.93s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202106_trip_count.csv; len: 1570


 97%|█████████▋| 94/97 [09:17<00:43, 14.58s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202107_trip_count.csv; len: 1583


 98%|█████████▊| 95/97 [09:35<00:31, 15.72s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202108_trip_count.csv; len: 1591


 99%|█████████▉| 96/97 [09:55<00:16, 16.86s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202109_trip_count.csv; len: 1597


100%|██████████| 97/97 [10:12<00:00,  6.32s/it]

Wrote ../data/citibike/exports/adjacency_matrices/202110_trip_count.csv; len: 1608



