In [1]:
import numpy as np
import pandas as pd

In [2]:
import os

In [3]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.io import output_notebook
from bokeh.layouts import row, gridplot, layout
from bokeh.palettes import d3
output_notebook()

In [4]:
PROJECT_ROOT = '/home/developer/gcp/cbidmltsf'

In [5]:
data_folder = '{}/datasets/traffic/PEMS-SF'.format(PROJECT_ROOT)
data_folder

'/home/developer/gcp/cbidmltsf/datasets/traffic/PEMS-SF'

In [6]:
raw_ts_folder = '{}/datasets/traffic/separated_raw'.format(PROJECT_ROOT)
raw_ts_folder

'/home/developer/gcp/cbidmltsf/datasets/traffic/separated_raw'

In [7]:
def process_list(s, variable_type=int, delimiter=None):
    """Parses a line in the PEMS format to a list."""
    if delimiter is None:
      l = [
          variable_type(i) for i in s.replace('[', '').replace(']', '').split()
      ]
    else:
      l = [
          variable_type(i)
          for i in s.replace('[', '').replace(']', '').split(delimiter)
      ]

    return l

In [8]:
def read_single_list(filename):
    """Returns single list from a file in the PEMS-custom format."""
    with open(os.path.join(data_folder, filename), 'r') as dat:
        l = process_list(dat.readlines()[0])
    return l

In [9]:
stations_list = [id for id in read_single_list('stations_list')]
len(stations_list)

963

In [10]:
test_ts_dir = '{}/sldbs/PEMS-SF_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX/test'.format(PROJECT_ROOT)
test_ts_dir

'/home/developer/gcp/cbidmltsf/sldbs/PEMS-SF_SEPARATED_FULL_BSCTRFM_168_168_07DB_MMX/test'

In [16]:
test_ts = pd.read_pickle('{}/ST_400000.pkl'.format(test_ts_dir))
test_ts

Unnamed: 0,id,sequential_id,occupancy_scaled,sin_hours_from_start,cos_hours_from_start,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week
3648,400000,0,0.010404,-6.888165e-01,0.724936,0.258819,0.965926,-0.781831,0.623490
3649,400000,0,0.009777,-6.877184e-01,0.725978,0.500000,0.866025,-0.781831,0.623490
3650,400000,0,0.015509,-6.866187e-01,0.727018,0.707107,0.707107,-0.781831,0.623490
3651,400000,0,0.035642,-6.855175e-01,0.728056,0.866025,0.500000,-0.781831,0.623490
3652,400000,0,0.122387,-6.844147e-01,0.729093,0.965926,0.258819,-0.781831,0.623490
...,...,...,...,...,...,...,...,...,...
4146,400000,0,0.091658,-6.054586e-03,0.999982,-0.965926,0.258819,-0.974928,-0.222521
4147,400000,0,0.078509,-4.540952e-03,0.999990,-0.866025,0.500000,-0.974928,-0.222521
4148,400000,0,0.061844,-3.027307e-03,0.999995,-0.707107,0.707107,-0.974928,-0.222521
4149,400000,0,0.037809,-1.513655e-03,0.999999,-0.500000,0.866025,-0.974928,-0.222521


In [15]:
def plot_line(title, x, y, width, height, x_label, y_label):
    p = figure(
        title=title,
        plot_width=width,
        plot_height=height,
        # tools='',
        # x_axis_type='datetime'
    )
    p.grid.grid_line_alpha=0.3

    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = y_label

    p.line(x,
           y,
           color='red',
           )

    return p

In [17]:
show(
    plot_line(
        title = 'ST_400000 test time series',
        x=test_ts.index,
        y= test_ts.occupancy_scaled,
        width=720,
        height=240,
        x_label='Index',
        y_label='Car occupancy (scaled)'
))

In [18]:
test_ts.describe()

Unnamed: 0,sequential_id,occupancy_scaled,sin_hours_from_start,cos_hours_from_start,sin_hour_day,cos_hour_day,sin_day_week,cos_day_week
count,503.0,503.0,503.0,503.0,503.0,503.0,503.0,503.0
mean,0.0,0.107841,-0.3619606,0.906422,-7.316877e-17,-0.001988072,0.001554337,-0.00124
std,0.0,0.067119,0.201533,0.082835,0.708514,0.707104,0.7076524,0.707966
min,0.0,0.000963,-0.6888165,0.724936,-1.0,-1.0,-0.9749279,-0.900969
25%,0.0,0.038652,-0.5395405,0.841959,-0.7071068,-0.7071068,-0.7818315,-0.900969
50%,0.0,0.128504,-0.3708532,0.928691,1.224647e-16,-1.83697e-16,-2.449294e-16,-0.222521
75%,0.0,0.165374,-0.1888233,0.982011,0.7071068,0.7071068,0.7818315,0.62349
max,0.0,0.334891,-2.449294e-16,1.0,1.0,1.0,0.9749279,1.0
