In [1]:
import datetime
import json
import os
from enum import Enum
from functools import reduce
from itertools import count
from operator import add
import numpy as np

import pandas as pd
import shapefile
from tqdm.notebook import tqdm
from numpy import inf

#from scripts.helpers import *
import scripthelper

import csv
from contextlib import contextmanager
from math import radians, cos, sin, asin, sqrt
from typing import Iterable, Dict, Tuple, List

In [2]:
# ---------------------------------------------------------------
# ------------------------LatLon distance------------------------
# ---------------------------------------------------------------


def distance(lat1, lon1, lat2, lon2):
    # The math module contains a function named
    # radians which converts from degrees to radians.
    lon1 = radians(float(lon1))
    lon2 = radians(float(lon2))
    lat1 = radians(float(lat1))
    lat2 = radians(float(lat2))

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2

    c = 2 * asin(sqrt(a))

    # Radius of earth in kilometers. Use 3956 for miles
    r = 6371

    # calculate the result
    return c * r


# ---------------------------------------------------------------
# ----------------------------CSVs IO----------------------------
# ---------------------------------------------------------------


def read_csv_list(path, as_dict=False):
    with open(path, 'r', encoding='utf8') as file:
        return list((csv.DictReader if as_dict else csv.reader)(file))


def read_csv_stream(path, as_dict=False, skip_first=True):
    with open(path, 'r', encoding='utf8') as file:
        reader = (csv.DictReader if as_dict else csv.reader)(file)
        if skip_first:
            next(reader)
        for line in reader:
            yield line


def get_csv_writer(path):
    file = open(path, 'w', encoding='utf8', newline='')
    return csv.writer(file), file


@contextmanager
def write_csv(path) -> csv.writer:
    file = open(path, 'w', encoding='utf8', newline='')
    try:
        yield csv.writer(file)
    finally:
        file.close()


# ---------------------------------------------------------------
# -----------------------Line manipulation-----------------------
# ---------------------------------------------------------------

def group_line_stops(line_stops: Iterable[str]) -> Dict[str, Tuple[List[List[str]], List[List[str]]]]:
    line_stops_map = {}
    for stop in line_stops:
        line_id = str(int(stop[0][0:-1]))
        direction = int(stop[1]) - 1
        if line_id not in line_stops_map:
            line_stops_map[line_id] = ([], [])
        line_stops_map[line_id][direction].append(stop)
    for line, directions in line_stops_map.items():
        for direction in directions:
            direction.sort(key=lambda stop: int(stop[-1]))
    return line_stops_map

In [3]:
raw_json_files = [
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition01.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition02.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition03.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition04.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition05.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition06.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition07.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition08.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition09.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition10.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition11.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition12.json",
    "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehiclePosition13.json"
]

vehicle_positions_csv = "C:/Users/chibo/OneDrive/Documents/ULB schooling/DM (INFO-H-423)/Project Files/dm-stib-hack-my-ride-main/Project Data-20221104/vehicle_positions_csv.csv"
csv_header = ['Timestamp', 'LineId', 'DirectionId', 'DistanceFromPoint', 'PointId']

In [4]:
with write_csv(vehicle_positions_csv) as writer:
    writer.writerow(csv_header)
    for raw_json_path in tqdm(raw_json_files):
        file = open(raw_json_path, 'r', encoding='utf8')
        data = json.load(file)['data']
        file.close()
        for time in data:
            timestamp = time['time']
            for response in time['Responses']:
                if response is None:
                    # Skip if response is empty
                    continue
                for line in response['lines']:
                    line_id = line['lineId']
                    for vehiclePosition in line['vehiclePositions']:
                        writer.writerow([
                            timestamp,
                            line_id,
                            vehiclePosition['directionId'],
                            vehiclePosition['distanceFromPoint'],
                            vehiclePosition['pointId'],
                        ])

  0%|          | 0/13 [00:00<?, ?it/s]

In [5]:
vehicle_positions_df = pd.read_csv(vehicle_positions_csv)
vehicle_positions_df.head()

Unnamed: 0,Timestamp,LineId,DirectionId,DistanceFromPoint,PointId
0,1630914886924,1,8161,1,8012
1,1630914886924,1,8162,0,8142
2,1630914886924,1,8162,0,8282
3,1630914886924,1,8731,0,8111
4,1630914886924,1,8162,1,8062


In [6]:
vehicle_positions_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19421883 entries, 0 to 19421882
Data columns (total 5 columns):
 #   Column             Dtype
---  ------             -----
 0   Timestamp          int64
 1   LineId             int64
 2   DirectionId        int64
 3   DistanceFromPoint  int64
 4   PointId            int64
dtypes: int64(5)
memory usage: 740.9 MB
