# Circulation Data Conversion

This document, similar to WIMConversion, is for converting the traffic circulation data into a useable pickled form

In [1]:
import pandas as pd
import numpy as np
import h5py

In [138]:
def load_table_from_struct_circ(station, year) -> pd.DataFrame():
    
    file =  h5py.File('{}/{}_{}_struct.mat'.format(station,station, year), 'r')
    circ_struct = file['circ_struct']
    
    data = []
    for ref in circ_struct['table']['data'][:]:
        data.append(file[ref[0]].value)
    
    columns = []
    for column in circ_struct['columns'][:]:
        columns.append(file[column[0]].value.tobytes()[::2].decode())
    
    table_dict = {}
    for colidx in range(len(columns)):
        table_dict[columns[colidx]] = data[colidx][0]
        
    #For some reason there is this "c" column that is only of length 6, drop this
    del table_dict['c']
    return pd.DataFrame(table_dict)

In [137]:
def circ_df_pickle(df):
    #Less than 1% null, therefore remove the null values
    is_null = df.d.isnull()
    df = df[df.d != is_null]
    df['d_str'] = df.d.astype(str)
    df['len_d'] = df['d_str'].apply(lambda x: len(x))
    #Eliminate any date that does not have the minimum length of 5
    df = df[df.len_d.isin([5, 6])]
    #Then to breakdown the datetime, some of the columns need to be converted to str
    df['Date'] = df['d'].apply(lambda x: pd.to_datetime(x,errors='coerce', format='%m%d%y'))
    df = df.drop(columns=['d'])
    df['Date'] += pd.to_timedelta(df.h, unit='h')
    df['Date'] += pd.to_timedelta(df.m, unit='m')
    df['Date'] += pd.to_timedelta(df.s, unit='s')
    df['Date'] += pd.to_timedelta(df.ms, unit='ms')
    df = df.drop(columns=['h', 'm', 's', 'ms', 'd_str', 'len_d'])
    
    pd.to_pickle(df, '{}/{}_circ.pkl'.format(station, year))


In [111]:
station = '137'
year = 2017
df = load_table_from_struct_circ(station, year)
circ_df_pickle()