In [1]:
import csv
import os
import traceback
import numpy as np
import tqdm
import pandas as pd
import multiprocessing as mp
import gc
from datetime import timedelta

import rsa_data_summary as rd
import rsa_data_wim as wim
import rsa_headers as rh
import config
import queries as q
import tools

pd.options.mode.chained_assignment = None
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [2]:
# individual tables for type 30 and 60 test
files = tools.getfiles(r'C:\FTP\Syntell\SMEC RSA Files_GP PRM Sites_Dec21toFeb22')
file = r"C:\FTP\Syntell\0087_20220331.RSA"

COLLECTING FILES......


In [3]:
electronic_count_data_type_21_columns = list(pd.read_sql_query("SELECT * from trafc.electronic_count_data_type_21 limit 1",config.ENGINE).columns)
electronic_count_data_type_30_columns = list(pd.read_sql_query("SELECT * from trafc.electronic_count_data_type_30 limit 1",config.ENGINE).columns)
electronic_count_data_type_60_columns = list(pd.read_sql_query("SELECT * from trafc.electronic_count_data_type_60 limit 1",config.ENGINE).columns)
electronic_count_data_type_70_columns = list(pd.read_sql_query("SELECT * from trafc.electronic_count_data_type_70 limit 1",config.ENGINE).columns)
header_columns = list(pd.read_sql_query("SELECT * from trafc.electronic_count_header limit 1",config.ENGINE).columns)

In [125]:
df = tools.to_df(file)

In [6]:
def select_classification_scheme(classification_scheme):
    if int(classification_scheme) == 8:
        vc_df = pd.read_sql_query(q.SELECT_CLASSIFICAITON_SCHEME_8, config.ENGINE)
    elif int(classification_scheme) == 1:
        vc_df = pd.read_sql_query(q.SELECT_CLASSIFICAITON_SCHEME_1, config.ENGINE)
    elif int(classification_scheme) == 5:
        vc_df = pd.read_sql_query(q.SELECT_CLASSIFICAITON_SCHEME_5, config.ENGINE)
    elif int(classification_scheme) == 9:
        vc_df = pd.read_sql_query(q.SELECT_CLASSIFICAITON_SCHEME_9, config.ENGINE)
    else:
        vc_df = None
    return vc_df
    

In [145]:
def electronic_count_data_type_30(df: pd.DataFrame) -> pd.DataFrame:
    data = df.loc[(df[0] == "30") & (df[1].isin(["0", "1", "2", "3", "4"]))].dropna(
                axis=1, how="all"
            ).reset_index(drop=True)
    dfh2 = pd.DataFrame(df.loc[(df[0].isin(["S0", "L1", "L2", "L3", "L4", "L5", "L6", "L7", "L8"]))]).dropna(
        axis=1, how="all"
    ).reset_index(drop=True)
    header = df.loc[(df[0] == "30") & (~df[1].isin(["0", "1", "2", "3", "4"]))].dropna(
                axis=1, how="all"
            ).reset_index(drop=True)

    if data.empty:
        pass
    else:
        dir_1 = dfh2.iloc[1:,2].astype(int).min()
        dir_2 = dfh2.iloc[1:,2].astype(int).max()

        if header.shape[1] > 3:
            summary_interval = header.iloc[0,2]
            classification_scheme = header.iloc[0,3]
            number_of_data_records = header.iloc[0,4]
        else:
            summary_interval = header.iloc[0,1]
            classification_scheme = header.iloc[0,2]
            number_of_data_records = header.iloc[0,3]

        vc_df = select_classification_scheme(classification_scheme)

        if data[1].isin(["0", "2"]).any():
            ddf = data.iloc[:, 1:].reset_index(drop=True)
            ddf = pd.DataFrame(ddf).dropna(axis=1, how="all").reset_index(drop=True)

            site_id = dfh2.iloc[1,1]
            time_length = len(ddf[2][0])
            date_length = len(ddf[2][0])
            duration = int(ddf[4][0])
            ddf[4] = ddf[4].astype(int)
            ddf[5] = ddf[5].astype(int)
            max_lanes = ddf[5].max()

            ddf[3].loc[ddf[3].str[:2] == '24'] = ('0').zfill(time_length)
            
            ddf[2] = ddf[2].apply(lambda x: pd.to_datetime(x, format="%y%m%d").date() 
                if len(x)==6 else pd.to_datetime(x, format="%Y%m%d").date())

            ddf[2].loc[ddf[3] == ('0').zfill(time_length)] = ddf[2]+timedelta(days=1)

            ddf[3] = ddf[3].apply(lambda x: pd.to_datetime(x, format="%H%M").time()
                if len(x)==4 
                else pd.to_datetime(x, format="%H%M%S").time())

            ddf = ddf.apply(pd.to_numeric, errors='ignore')
                
            ddf["direction"] = ddf.apply(
                lambda x: "P" if int(x[5]) <= (int(max_lanes) / 2) else "N",
                axis=1,
            )
            ddf["compas_heading"] = ddf.apply(
                lambda x: dir_1 if int(x[5]) <= (int(max_lanes) / 2) else dir_2,
                axis=1,
            )

            ddf['vehicle_classification_scheme'] = int(classification_scheme)

            ddf['start_datetime'] = pd.to_datetime(ddf[2].astype(str)+ddf[3].astype(str), 
                format='%Y-%m-%d%H:%M:%S') - timedelta(minutes=duration)
            
            ddf.columns = ddf.columns.astype(str)

            df3 = pd.DataFrame(columns=['edit_code', 'start_datetime', 'end_date', 'end_time', 
                'duration_min', 'lane_number', 'number_of_vehicles', 'class_number', 'direction', 'compas_heading'])
            for lane_no in range(max_lanes):
                for i in range(1,int(number_of_data_records)):
                    i=i+5
                    join_to_df3 = ddf.loc[ddf['5'] == lane_no, ['1', 'start_datetime','2', '3', '4', '5',str(i), 'direction', 'compas_heading']]
                    join_to_df3['class_number'] = i-5
                    join_to_df3.rename(columns={'1':"edit_code",'2':"end_date",'3':"end_time",'4':"duration_min",'5':'lane_number', str(i): 'number_of_vehicles'}, inplace=True)
                    df3 = pd.concat([df3,join_to_df3],keys=['start_datetime','lane_number','number_of_vehicles','class_number'],ignore_index=True, axis=0)
            df3['classification_scheme'] = int(classification_scheme)
            df3['site_id'] = site_id
            df3['year'] = int(df3['start_datetime'][0].year)
        else:
            pass
        return df3

In [146]:
df30 = electronic_count_data_type_30(df)
print(df30.head())

  edit_code       start_datetime    end_date  end_time duration_min  \
0         0  2022-03-01 12:45:00  2022-03-01  13:00:00           15   
1         0  2022-03-01 13:00:00  2022-03-01  13:15:00           15   
2         0  2022-03-01 13:15:00  2022-03-01  13:30:00           15   
3         0  2022-03-01 13:30:00  2022-03-01  13:45:00           15   
4         0  2022-03-01 13:45:00  2022-03-01  14:00:00           15   

  lane_number number_of_vehicles class_number direction compas_heading  \
0           1                  2            1         P              0   
1           1                  2            1         P              0   
2           1                  5            1         P              0   
3           1                  0            1         P              0   
4           1                  5            1         P              0   

   classification_scheme site_id  year  
0                      8      01  2022  
1                      8      01  2022  
2    

In [119]:
if data[1].isin(["0", "2"]).any():
    ddf = data.iloc[:, 1:].reset_index(drop=True)
    ddf = pd.DataFrame(ddf).dropna(axis=1, how="all").reset_index(drop=True)

    site_id = df.iloc[1,1]
    time_length = len(ddf[2][0])
    date_length = len(ddf[2][0])
    duration = int(ddf[4][0])
    ddf[4] = ddf[4].astype(int)
    ddf[5] = ddf[5].astype(int)
    max_lanes = ddf[5].max()

    ddf[3].loc[ddf[3].str[:2] == '24'] = ('0').zfill(time_length)
    
    ddf[2] = ddf[2].apply(lambda x: pd.to_datetime(x, format="%y%m%d").date() 
        if len(x)==6 else pd.to_datetime(x, format="%Y%m%d").date())

    ddf[2].loc[ddf[3] == ('0').zfill(time_length)] = ddf[2]+timedelta(days=1)

    ddf[3] = ddf[3].apply(lambda x: pd.to_datetime(x, format="%H%M").time()
        if len(x)==4 
        else pd.to_datetime(x, format="%H%M%S").time())

    ddf = ddf.apply(pd.to_numeric, errors='ignore')
        
    ddf["direction"] = ddf.apply(
        lambda x: "P" if int(x[5]) <= (int(max_lanes) / 2) else "N",
        axis=1,
    )
    ddf['vehicle_classification_scheme'] = int(classification_scheme)

    ddf['start_datetime'] = pd.to_datetime(ddf[2].astype(str)+ddf[3].astype(str), 
        format='%Y-%m-%d%H:%M:%S') - timedelta(minutes=duration)
    
    ddf.columns = ddf.columns.astype(str)

    df3 = pd.DataFrame(columns=['edit_code', 'start_datetime', 'end_date', 'end_time', 
        'duration_min', 'lane_number', 'number_of_vehicles', 'class_number', 'direction'])
    for lane_no in range(max_lanes):
        for i in range(1,int(number_of_data_records)):
            i=i+5
            join_to_df3 = ddf.loc[ddf['5'] == lane_no, ['1', 'start_datetime','2', '3', '4', '5',str(i), 'direction']]
            join_to_df3['class_number'] = i-5
            join_to_df3.rename(columns={'1':"edit_code",'2':"end_date",'3':"end_time",'4':"duration_min",'5':'lane_number', str(i): 'number_of_vehicles'}, inplace=True)
            df3 = pd.concat([df3,join_to_df3],keys=['start_datetime','lane_number','number_of_vehicles','class_number'],ignore_index=True, axis=0)
    df3['classification_scheme'] = int(classification_scheme)
    df3['site_id'] = site_id
    df3['year'] = int(df3['start_datetime'][0].year)
else:
    pass


In [138]:
dfh2.iloc[1:,2].astype(int).max()

4

In [118]:
df3.head()

Unnamed: 0,edit_code,start_datetime,end_date,end_time,duration_min,lane_number,number_of_vehicles,class_number,classification_scheme,site_id,year
0,0,2022-03-01 12:45:00,2022-03-01,13:00:00,15,1,2,1,8,87,2022
1,0,2022-03-01 13:00:00,2022-03-01,13:15:00,15,1,2,1,8,87,2022
2,0,2022-03-01 13:15:00,2022-03-01,13:30:00,15,1,5,1,8,87,2022
3,0,2022-03-01 13:30:00,2022-03-01,13:45:00,15,1,0,1,8,87,2022
4,0,2022-03-01 13:45:00,2022-03-01,14:00:00,15,1,5,1,8,87,2022


In [110]:
df.iloc[1,1]

'0087'

In [182]:
ddf2 = data.iloc[:, 5:].reset_index(drop=True)
ddf2.columns = range(ddf2.columns.size)

In [30]:
for index, row in ddf.iterrows():
    if (row[2] in ['2400','240000']):
        row['end_date'] = pd.to_datetime(row["end_date"]) + timedelta(days=1)
        row[2] = ('0').zfill(len(row[2])-1)
    else:
        row['end_date'] = pd.to_datetime(row["end_date"])

15   2022-03-01
Name: end_date, dtype: datetime64[ns]

In [55]:
ddf = ddf.apply(lambda x: pd.to_datetime(x["end_date"]) + timedelta(days=1) if x[2] in ['2400','2400'] else x)

KeyError: 'end_time'

In [50]:
pd.to_datetime(ddf["end_date"]) + timedelta(days=1)

15      2022-03-02
16      2022-03-02
17      2022-03-02
18      2022-03-02
27      2022-03-02
28      2022-03-02
29      2022-03-02
30      2022-03-02
39      2022-03-02
40      2022-03-02
41      2022-03-02
42      2022-03-02
51      2022-03-02
52      2022-03-02
53      2022-03-02
54      2022-03-02
63      2022-03-02
64      2022-03-02
65      2022-03-02
66      2022-03-02
75      2022-03-02
76      2022-03-02
77      2022-03-02
78      2022-03-02
87      2022-03-02
88      2022-03-02
89      2022-03-02
90      2022-03-02
99      2022-03-02
100     2022-03-02
101     2022-03-02
102     2022-03-02
111     2022-03-02
112     2022-03-02
113     2022-03-02
114     2022-03-02
123     2022-03-02
124     2022-03-02
125     2022-03-02
126     2022-03-02
135     2022-03-02
136     2022-03-02
137     2022-03-02
138     2022-03-02
147     2022-03-02
148     2022-03-02
149     2022-03-02
150     2022-03-02
159     2022-03-02
160     2022-03-02
161     2022-03-02
162     2022-03-02
171     2022

In [36]:
ddf['end_date'] = ddf['end_date'].apply(lambda x: pd.to_datetime(x, format="%y%m%d").date() if len(x)==6 else pd.to_datetime(x, format="%Y%m%d").date())

In [10]:
ddf["end_date"] = ddf.apply(lambda x: x["end_date"] + pd.DateOffset(1) if x["end_time"] in ["2400","240000"] else x)

KeyError: 'end_time'

In [None]:
ddf[2] = ddf[2].apply(lambda x: '0000' if (x=='2400' and len(x)==4) 
else '000000' if ((x=='240000' and len(x)==6))
else x)
ddf[2] = ddf[2].apply(lambda x: pd.to_datetime(x, format="%H%M").time()
if len(x)==4 
else pd.to_datetime(x, format="%H%M%S").time())

Unnamed: 0,edit_code,end_datetime,end_time,duration_min,lane_number,direction
15,0,220301,1300,15,1,P
16,0,220301,1300,15,2,P
17,0,220301,1300,15,3,N
18,0,220301,1300,15,4,N
27,0,220301,1315,15,1,P


In [47]:
data2 = pd.concat([d2, DATA.dtype30], keys=["site_id", "start_datetime", "lane_number"], ignore_index=False, axis=1)
data2 = data2.droplevel(0, axis=1)
data2 = pd.concat([data2, DATA.dtype70], keys=["site_id", "start_datetime", "lane_number"], ignore_index=False, axis=1)
data2 = data2.droplevel(0, axis=1)
data3 = data2.T.drop_duplicates().T


In [73]:
def merge_summary_dataframes(join_this_df: pd.DataFrame, onto_this_df: pd.DataFrame) -> pd.DataFrame:
    onto_this_df = pd.concat([onto_this_df, join_this_df], keys=["site_id", "start_datetime", "lane_number"], ignore_index=False, axis=1)
    onto_this_df = onto_this_df.droplevel(0, axis=1)
    onto_this_df = onto_this_df.T.drop_duplicates().T
    return onto_this_df

In [74]:
data3.head()

Unnamed: 0,header_id,station_name,index,end_datetime,end_time,duration_min,lane_number,speedbin1,speedbin2,speedbin3,speedbin4,speedbin5,speedbin6,speedbin7,speedbin8,speedbin9,speedbin10,sum_of_heavy_vehicle_speeds,short_heavy_vehicles,medium_heavy_vehicles,long_heavy_vehicles,rear_to_rear_headway_shorter_than_2_seconds,rear_to_rear_headways_shorter_than_programmed_time,speedbin0,total_heavy_vehicles_type21,total_light_vehicles_type21,total_vehicles_type21,direction,end_date,start_datetime,year,site_id,index.1,index.2,total_free_flowing_light_vehicles,total_following_light_vehicles,total_free_flowing_heavy_vehicles,total_following_heavy_vehicles,sum_of_inverse_of_speeds_for_free_flowing_lights,sum_of_inverse_of_speeds_for_following_lights,sum_of_inverse_of_speeds_for_free_flowing_heavies,sum_of_inverse_of_speeds_for_following_heavies,sum_of_speeds_for_free_flowing_lights,sum_of_speeds_for_following_lights,sum_of_speeds_for_free_flowing_heavies,sum_of_speeds_for_following_heavies,sum_of_squared_speeds_of_free_flowing_lights,sum_of_squared_speeds_for_following_lights,sum_of_squared_speeds_of_free_flowing_heavies,sum_of_squared_speeds_for_following_heavies
0,341c04fb-db50-4e22-bcc9-b176dc23c23c,0127 Bronkhorstspruit(RM),13,2022-01-01 00:15:00.000000,00:15:00.000000,15,1,1,0,2,2,1,2,1,1,0,0,0,0,0,0,0,0,0,0,10,10,P,2022-01-01,2022-01-01 00:00:00.000000,2022,127,15,17,10,0,0,0,4078,0,0,0,924,0,0,0,88978,0,0,0
1,341c04fb-db50-4e22-bcc9-b176dc23c23c,0127 Bronkhorstspruit(RM),14,2022-01-01 00:15:00.000000,00:15:00.000000,15,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,N,2022-01-01,2022-01-01 00:00:00.000000,2022,127,16,18,1,0,0,0,368,0,0,0,98,0,0,0,9604,0,0,0
2,341c04fb-db50-4e22-bcc9-b176dc23c23c,0127 Bronkhorstspruit(RM),19,2022-01-01 00:30:00.000000,00:30:00.000000,15,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,P,2022-01-01,2022-01-01 00:15:00.000000,2022,127,21,23,3,0,0,0,1217,0,0,0,268,0,0,0,23990,0,0,0
3,341c04fb-db50-4e22-bcc9-b176dc23c23c,0127 Bronkhorstspruit(RM),20,2022-01-01 00:30:00.000000,00:30:00.000000,15,2,3,0,2,0,1,1,0,0,0,0,78,0,0,1,0,1,0,1,6,7,N,2022-01-01,2022-01-01 00:15:00.000000,2022,127,22,24,5,1,1,0,2598,755,461,0,375,48,78,0,30259,2304,6084,0
4,341c04fb-db50-4e22-bcc9-b176dc23c23c,0127 Bronkhorstspruit(RM),25,2022-01-01 00:45:00.000000,00:45:00.000000,15,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,P,2022-01-01,2022-01-01 00:30:00.000000,2022,127,27,29,2,0,0,0,804,0,0,0,193,0,0,0,19925,0,0,0


In [69]:
data2 = pd.concat([data2, DATA.dtype70], keys=["site_id", "start_datetime", "lane_number"], ignore_index=False, axis=1)

In [12]:
data2.drop_duplicates(axis=1)

TypeError: DataFrame.drop_duplicates() got an unexpected keyword argument 'axis'

In [161]:
data2.sort_values(["site_id", "start_datetime", "lane_number"]).head(50)

Unnamed: 0,header_id,station_name,end_datetime,duration_min,lane_number,speedbin1,speedbin2,speedbin3,speedbin4,speedbin5,speedbin6,speedbin7,speedbin8,speedbin9,speedbin10,sum_of_heavy_vehicle_speeds,short_heavy_vehicles,medium_heavy_vehicles,long_heavy_vehicles,rear_to_rear_headway_shorter_than_2_seconds,rear_to_rear_headways_shorter_than_programmed_time,speedbin0,total_heavy_vehicles_type21,total_light_vehicles_type21,total_vehicles_type21,direction,start_datetime,site_id,unknown_vehicle_error_class,light_motor_vehicles,heavy_vehicle,motorcycle,light_motor_vehicles_towing,two_axle_busses,two_axle_6_tyre_single_units,busses_with_3_or_4_axles,two_axle_6_tyre_single_unit_with_light_trailer_4_axles_max,three_axle_single_unit_including_single_axle_light_trailer,four_or_less_axle_including_a_single_trailer,buses_with_5_or_more_axles,three_axle_single_unit_and_light_trailer_more_than_4_axles,five_axle_single_trailer,six_axle_single_trailer,five_or_less_axle_multi_trailer,six_axle_multi_trailer,seven_axle_multi_trailer,eight_or_more_axle_multi_trailer,total_light_vehicles_type30,total_heavy_vehicles_type30,total_vehicles_type30,total_free_flowing_light_vehicles,total_following_light_vehicles,total_free_flowing_heavy_vehicles,total_following_heavy_vehicles,sum_of_inverse_of_speeds_for_free_flowing_lights,sum_of_inverse_of_speeds_for_following_lights,sum_of_inverse_of_speeds_for_free_flowing_heavies,sum_of_inverse_of_speeds_for_following_heavies,sum_of_speeds_for_free_flowing_lights,sum_of_speeds_for_following_lights,sum_of_speeds_for_free_flowing_heavies,sum_of_speeds_for_following_heavies,sum_of_squared_speeds_of_free_flowing_lights,sum_of_squared_speeds_for_following_lights,sum_of_squared_speeds_of_free_flowing_heavies,sum_of_squared_speeds_for_following_heavies,number_of_error_vehicles
0,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,1,1.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,61.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,4.0,5.0,P,2022-02-01 00:00:00.000000,108,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10748,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,1,,,,,,,,,,,,,,,,,,,,,P,2022-02-01 00:00:00.000000,108,0.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,,,,,,,,,,,,,,,,,
21496,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,1,,,,,,,,,,,,,,,,,,,,,P,2022-02-01 00:00:00.000000,108,,,,,,,,,,,,,,,,,,,,,,,4.0,0.0,1.0,0.0,2078.0,0.0,594.0,0.0,285.0,0.0,61.0,0.0,20871.0,0.0,3721.0,0.0,0.0
1,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,2,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,P,2022-02-01 00:00:00.000000,108,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10749,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,2,,,,,,,,,,,,,,,,,,,,,P,2022-02-01 00:00:00.000000,108,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,,,,,,,,,,,,,,,,,
21497,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,2,,,,,,,,,,,,,,,,,,,,,P,2022-02-01 00:00:00.000000,108,,,,,,,,,,,,,,,,,,,,,,,4.0,0.0,0.0,0.0,1637.0,0.0,0.0,0.0,358.0,0.0,0.0,0.0,32718.0,0.0,0.0,0.0,0.0
2,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,3,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,N,2022-02-01 00:00:00.000000,108,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10750,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,3,,,,,,,,,,,,,,,,,,,,,N,2022-02-01 00:00:00.000000,108,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,2.0,,,,,,,,,,,,,,,,,
21498,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,3,,,,,,,,,,,,,,,,,,,,,N,2022-02-01 00:00:00.000000,108,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,0.0,0.0,618.0,0.0,0.0,0.0,235.0,0.0,0.0,0.0,27833.0,0.0,0.0,0.0,0.0
3,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,4,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,N,2022-02-01 00:00:00.000000,108,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [45]:
data2 = pd.concat([data2, D.dtype70], keys=["site_id", "start_datetime", "lane_number"], ignore_index=True)

In [110]:
data2 = data2.sort_index()

In [111]:
data2.reset_index().head(50)

Unnamed: 0,level_0,site_id,start_datetime,lane_number,header_id,station_name,end_datetime,duration_min,speedbin1,speedbin2,speedbin3,speedbin4,speedbin5,speedbin6,speedbin7,speedbin8,speedbin9,speedbin10,sum_of_heavy_vehicle_speeds,short_heavy_vehicles,medium_heavy_vehicles,long_heavy_vehicles,rear_to_rear_headway_shorter_than_2_seconds,rear_to_rear_headways_shorter_than_programmed_time,speedbin0,total_heavy_vehicles_type21,total_light_vehicles_type21,total_vehicles_type21,direction,unknown_vehicle_error_class,light_motor_vehicles,heavy_vehicle,motorcycle,light_motor_vehicles_towing,two_axle_busses,two_axle_6_tyre_single_units,busses_with_3_or_4_axles,two_axle_6_tyre_single_unit_with_light_trailer_4_axles_max,three_axle_single_unit_including_single_axle_light_trailer,four_or_less_axle_including_a_single_trailer,buses_with_5_or_more_axles,three_axle_single_unit_and_light_trailer_more_than_4_axles,five_axle_single_trailer,six_axle_single_trailer,five_or_less_axle_multi_trailer,six_axle_multi_trailer,seven_axle_multi_trailer,eight_or_more_axle_multi_trailer,total_light_vehicles_type30,total_heavy_vehicles_type30,total_vehicles_type30
0,site_id,108,2022-02-01 00:00:00.000000,1,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,1.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,61.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,4.0,5.0,P,,,,,,,,,,,,,,,,,,,,,,
1,site_id,108,2022-02-01 00:00:00.000000,2,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,P,,,,,,,,,,,,,,,,,,,,,,
2,site_id,108,2022-02-01 00:00:00.000000,3,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,N,,,,,,,,,,,,,,,,,,,,,,
3,site_id,108,2022-02-01 00:00:00.000000,4,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,N,,,,,,,,,,,,,,,,,,,,,,
4,site_id,108,2022-02-01 00:15:00.000000,1,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:30:00.000000,15,0.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,5.0,P,,,,,,,,,,,,,,,,,,,,,,
5,site_id,108,2022-02-01 00:15:00.000000,2,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:30:00.000000,15,0.0,0.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,P,,,,,,,,,,,,,,,,,,,,,,
6,site_id,108,2022-02-01 00:15:00.000000,3,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:30:00.000000,15,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,N,,,,,,,,,,,,,,,,,,,,,,
7,site_id,108,2022-02-01 00:15:00.000000,4,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:30:00.000000,15,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,N,,,,,,,,,,,,,,,,,,,,,,
8,site_id,108,2022-02-01 00:30:00.000000,1,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:45:00.000000,15,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,P,,,,,,,,,,,,,,,,,,,,,,
9,site_id,108,2022-02-01 00:30:00.000000,2,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:45:00.000000,15,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,P,,,,,,,,,,,,,,,,,,,,,,


In [100]:
data2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,header_id,station_name,end_datetime,duration_min,speedbin1,speedbin2,speedbin3,speedbin4,speedbin5,speedbin6,speedbin7,speedbin8,speedbin9,speedbin10,sum_of_heavy_vehicle_speeds,short_heavy_vehicles,medium_heavy_vehicles,long_heavy_vehicles,rear_to_rear_headway_shorter_than_2_seconds,rear_to_rear_headways_shorter_than_programmed_time,speedbin0,total_heavy_vehicles_type21,total_light_vehicles_type21,total_vehicles_type21,direction,unknown_vehicle_error_class,light_motor_vehicles,heavy_vehicle,motorcycle,light_motor_vehicles_towing,two_axle_busses,two_axle_6_tyre_single_units,busses_with_3_or_4_axles,two_axle_6_tyre_single_unit_with_light_trailer_4_axles_max,three_axle_single_unit_including_single_axle_light_trailer,four_or_less_axle_including_a_single_trailer,buses_with_5_or_more_axles,three_axle_single_unit_and_light_trailer_more_than_4_axles,five_axle_single_trailer,six_axle_single_trailer,five_or_less_axle_multi_trailer,six_axle_multi_trailer,seven_axle_multi_trailer,eight_or_more_axle_multi_trailer,total_light_vehicles_type30,total_heavy_vehicles_type30,total_vehicles_type30,total_free_flowing_light_vehicles,total_following_light_vehicles,total_free_flowing_heavy_vehicles,total_following_heavy_vehicles,sum_of_inverse_of_speeds_for_free_flowing_lights,sum_of_inverse_of_speeds_for_following_lights,sum_of_inverse_of_speeds_for_free_flowing_heavies,sum_of_inverse_of_speeds_for_following_heavies,sum_of_speeds_for_free_flowing_lights,sum_of_speeds_for_following_lights,sum_of_speeds_for_free_flowing_heavies,sum_of_speeds_for_following_heavies,sum_of_squared_speeds_of_free_flowing_lights,sum_of_squared_speeds_for_following_lights,sum_of_squared_speeds_of_free_flowing_heavies,sum_of_squared_speeds_for_following_heavies,number_of_error_vehicles
site_id,start_datetime,lane_number,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1
108,2022-02-01 00:00:00.000000,1,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,1.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,61.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,4.0,5.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
108,2022-02-01 00:00:00.000000,1,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,,,,,,,,,,,,,,,,,,,,,P,0.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,,,,,,,,,,,,,,,,,
108,2022-02-01 00:00:00.000000,1,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,,,,,,,,,,,,,,,,,,,,,P,,,,,,,,,,,,,,,,,,,,,,,4.0,0.0,1.0,0.0,2078.0,0.0,594.0,0.0,285.0,0.0,61.0,0.0,20871.0,0.0,3721.0,0.0,0.0
108,2022-02-01 00:00:00.000000,2,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,P,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
108,2022-02-01 00:00:00.000000,2,001a8b3b-0d3b-4e0e-9b02-79f3827308ce,0108 Modderfontein Rd(CM),2022-02-01 00:15:00.000000,15,,,,,,,,,,,,,,,,,,,,,P,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,,,,,,,,,,,,,,,,,


In [71]:
t2 = rd.Data.join(header, t1)

In [73]:
# t2.drop("station_name", axis=1, inplace=True)
# t2["start_datetime"] = t2["start_datetime"].astype("datetime64[ns]")
# d2["start_datetime"] = d2["start_datetime"].astype("datetime64[ns]")
# t2 = t2.merge(
#     d2, how="outer", on=["site_id", "start_datetime", "lane_number"]
# )

In [64]:
data2.head()

Unnamed: 0,header_id_x,end_datetime_x_x,duration_min_x_x,lane_number,speedbin1_x,speedbin2_x,speedbin3_x,speedbin4_x,speedbin5_x,speedbin6_x,speedbin7_x,speedbin8_x,speedbin9_x,speedbin10_x,sum_of_heavy_vehicle_speeds_x,short_heavy_vehicles_x,medium_heavy_vehicles_x,long_heavy_vehicles_x,rear_to_rear_headway_shorter_than_2_seconds_x,rear_to_rear_headways_shorter_than_programmed_time_x,speedbin0_x,total_heavy_vehicles_type21_x,total_light_vehicles_type21_x,total_vehicles_type21_x,direction_x_x,start_datetime,site_id,end_datetime_y_x,duration_min_y_x,speedbin1_y,speedbin2_y,speedbin3_y,speedbin4_y,speedbin5_y,speedbin6_y,speedbin7_y,speedbin8_y,speedbin9_y,speedbin10_y,sum_of_heavy_vehicle_speeds_y,short_heavy_vehicles_y,medium_heavy_vehicles_y,long_heavy_vehicles_y,rear_to_rear_headway_shorter_than_2_seconds_y,rear_to_rear_headways_shorter_than_programmed_time_y,speedbin0_y,total_heavy_vehicles_type21_y,total_light_vehicles_type21_y,total_vehicles_type21_y,direction_y_x,header_id_y,end_datetime_x_y,duration_min_x_y,unknown_vehicle_error_class_x,light_motor_vehicles_x,heavy_vehicle_x,motorcycle_x,light_motor_vehicles_towing_x,two_axle_busses_x,two_axle_6_tyre_single_units_x,busses_with_3_or_4_axles_x,two_axle_6_tyre_single_unit_with_light_trailer_4_axles_max_x,three_axle_single_unit_including_single_axle_light_trailer_x,four_or_less_axle_including_a_single_trailer_x,buses_with_5_or_more_axles_x,three_axle_single_unit_and_light_trailer_more_than_4_axles_x,five_axle_single_trailer_x,six_axle_single_trailer_x,five_or_less_axle_multi_trailer_x,six_axle_multi_trailer_x,seven_axle_multi_trailer_x,eight_or_more_axle_multi_trailer_x,total_light_vehicles_type30_x,total_heavy_vehicles_type30_x,total_vehicles_type30_x,direction_x_y,end_datetime_y_y,duration_min_y_y,unknown_vehicle_error_class_y,light_motor_vehicles_y,heavy_vehicle_y,motorcycle_y,light_motor_vehicles_towing_y,two_axle_busses_y,two_axle_6_tyre_single_units_y,busses_with_3_or_4_axles_y,two_axle_6_tyre_single_unit_with_light_trailer_4_axles_max_y,three_axle_single_unit_including_single_axle_light_trailer_y,four_or_less_axle_including_a_single_trailer_y,buses_with_5_or_more_axles_y,three_axle_single_unit_and_light_trailer_more_than_4_axles_y,five_axle_single_trailer_y,six_axle_single_trailer_y,five_or_less_axle_multi_trailer_y,six_axle_multi_trailer_y,seven_axle_multi_trailer_y,eight_or_more_axle_multi_trailer_y,total_light_vehicles_type30_y,total_heavy_vehicles_type30_y,total_vehicles_type30_y,direction_y_y
0,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:15:00.000000,15.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,2022-02-01 00:00:00,350,2022-02-01 00:15:00,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,P,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:15:00.000000,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,2022-02-01 00:15:00,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,P
1,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:15:00.000000,15.0,2,0.0,0.0,4.0,1.0,0.0,2.0,0.0,0.0,0.0,0.0,243.0,0.0,0.0,3.0,0.0,1.0,0.0,3.0,4.0,7.0,N,2022-02-01 00:00:00,350,2022-02-01 00:15:00,15,0,0,4,1,0,2,0,0,0,0,243,0,0,3,0,1,0,3,4,7,N,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:15:00.000000,15.0,0.0,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,N,2022-02-01 00:15:00,15,0,4,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,4,N
2,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:30:00.000000,15.0,1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,84.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,P,2022-02-01 00:15:00,350,2022-02-01 00:30:00,15,0,0,0,1,0,0,0,0,0,0,84,1,0,0,0,0,0,1,0,1,P,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:30:00.000000,15.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,2022-02-01 00:30:00,15,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,P
3,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:30:00.000000,15.0,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,74.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,N,2022-02-01 00:15:00,350,2022-02-01 00:30:00,15,0,0,1,0,0,0,0,0,0,0,74,0,0,1,0,0,0,1,0,1,N,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:30:00.000000,15.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,N,2022-02-01 00:30:00,15,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,N
4,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:45:00.000000,15.0,1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,84.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,P,2022-02-01 00:30:00,350,2022-02-01 00:45:00,15,0,0,0,1,0,0,0,0,0,0,84,0,0,1,0,0,0,1,0,1,P,c6e8b704-5dd8-400c-8f6c-f52198907a2c,2022-02-01 00:45:00.000000,15.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,P,2022-02-01 00:45:00,15,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,P


In [27]:
data.head()

Unnamed: 0,site_id,header_id,"""year""",start_datetime,end_datetime,duration_min,direction,forward_direction_code,lane_number,speedbin0,speedbin1,speedbin2,speedbin3,speedbin4,speedbin5,speedbin6,speedbin7,speedbin8,speedbin9,speedbin10,sum_of_heavy_vehicle_speeds,short_heavy_vehicles,medium_heavy_vehicles,long_heavy_vehicles,rear_to_rear_headway_shorter_than_2_seconds,rear_to_rear_headways_shorter_than_programmed_time,total_light_vehicles_type21,total_heavy_vehicles_type21,total_vehicles_type21,unknown_vehicle_error_class,motorcycle,light_motor_vehicles,light_motor_vehicles_towing,heavy_vehicle,two_axle_busses,two_axle_6_tyre_single_units,busses_with_3_or_4_axles,two_axle_6_tyre_single_unit_with_light_trailer_4_axles_max,three_axle_single_unit_including_single_axle_light_trailer,four_or_less_axle_including_a_single_trailer,buses_with_5_or_more_axles,three_axle_single_unit_and_light_trailer_more_than_4_axles,five_axle_single_trailer,six_axle_single_trailer,five_or_less_axle_multi_trailer,six_axle_multi_trailer,seven_axle_multi_trailer,eight_or_more_axle_multi_trailer,total_light_vehicles_type30,total_heavy_vehicles_type30,total_vehicles_type30,number_of_error_vehicles,total_free_flowing_light_vehicles,total_following_light_vehicles,total_free_flowing_heavy_vehicles,total_following_heavy_vehicles,sum_of_inverse_of_speeds_for_free_flowing_lights,sum_of_inverse_of_speeds_for_following_lights,sum_of_inverse_of_speeds_for_free_flowing_heavies,sum_of_inverse_of_speeds_for_following_heavies,sum_of_speeds_for_free_flowing_lights,sum_of_speeds_for_following_lights,sum_of_speeds_for_free_flowing_heavies,sum_of_speeds_for_following_heavies,sum_of_squared_speeds_of_free_flowing_lights,sum_of_squared_speeds_for_following_lights,sum_of_squared_speeds_of_free_flowing_heavies,sum_of_squared_speeds_for_following_heavies,physical_lane_number,forward_1_or_reverse_code_2,vehicle_category,vehicle_class_code_primary_scheme,vehicle_class_code_secondary_scheme,vehicle_speed,vehicle_length,site_occupancy_time_in_milliseconds,chassis_height_code,vehicle_following_code,vehicle_tag_code,trailer_count,axle_count,bumper_to_1st_axle_spacing,sub_data_type_code_axle_spacing,number_of_axles_spacings_counted,axle_spacing_1_between_individual_axles_cm,axle_spacing_2_between_individual_axles_cm,axle_spacing_3_between_individual_axles_cm,axle_spacing_4_between_individual_axles_cm,axle_spacing_5_between_individual_axles_cm,axle_spacing_6_between_individual_axles_cm,axle_spacing_7_between_individual_axles_cm,axle_spacing_8_between_individual_axles_cm


In [None]:


        # H = rh.Headers(df)
        # dfh = pd.DataFrame(
        #     df.loc[
        #         (df[0].isin(["H0", "S0", "I0", "S1", "D0", "D1", "D3", "L0", "L1", "L2", "L3","L4" ,"L5","L6","L7","L8","L9","L10","L11","L12"]))
        #         | (
        #             (df[0].isin(["21", "70", "30", "13", "60"]))
        #             & (~df[1].isin(["0", "1", "2", "3", "4"]))
        #         )
        #         | (
        #             (df[0].isin(["10"]))
        #             & (df[1].isin(["1", "8", "5", "01", "08", "05"]))
        #         )
        #     ]
        # ).dropna(axis=1, how="all")
        # dfh["index"] = dfh.index
        # breaks = dfh["index"].diff() != 1
        # groups = breaks.cumsum()
        # dfh["newindex"] = groups
        # dfh = dfh.set_index("newindex")
        # dfh = dfh.drop(columns=["index"])
        # header = rh.header
        # header["document_url"] = str(file)


        # DATA = rd.Data(df)
        # data = tools.data_join(data, header)
        # data.drop("station_name", axis=1, inplace=True)

        # d2 = tools.dtype30(df)
        # if d2 is None:
        #     pass
        # else:
        #     d2 = tools.data_join(d2, header)
        #     data = data.merge(
        #         d2, how="outer", on=["site_id", "start_datetime", "lane_number"]
        #     )

        # d2 = tools.dtype70(df)
        # if d2 is None:
        #     pass
        # else:
        #     data = tools.data_join(d2, header)
        #     data.drop("station_name", axis=1, inplace=True)
        #     data["start_datetime"] = data["start_datetime"].astype("datetime64[ns]")
        #     d2["start_datetime"] = d2["start_datetime"].astype("datetime64[ns]")
        #     data = data.merge(
        #         d2, how="outer", on=["site_id", "start_datetime", "lane_number"]
        #     )

        # d3, sub_data_df = tools.dtype10(df)
        # if d3 is None:
        #     pass
        # else:
        #     data = tools.data_join(d3, header)
        #     tools.push_to_db(d3,
        #     "electronic_count_data_type_10",
        #     ["site_id", "start_datetime", "assigned_lane_number"],
        #     )

        #     sub_data_df = sub_data_df.replace(r'^\s*$', np.NaN, regex=True)
        #     sub_data_df = sub_data_df.drop("index", axis=1) 
        #     wx_data = sub_data_df.loc[sub_data_df['sub_data_type_code'].str.lower().str[0] == 'w']
        #     sx_data = sub_data_df.loc[sub_data_df['sub_data_type_code'].str.lower().str[0] == 's']
        #     gx_data = sub_data_df.loc[sub_data_df['sub_data_type_code'].str.lower().str[0] == 'g']
        #     vx_data = sub_data_df.loc[sub_data_df['sub_data_type_code'].str.lower().str[0] == 'v']
        #     tx_data = sub_data_df.loc[sub_data_df['sub_data_type_code'].str.lower().str[0] == 't']
        #     ax_data = sub_data_df.loc[sub_data_df['sub_data_type_code'].str.lower().str[0] == 'a']
        #     cx_data = sub_data_df.loc[sub_data_df['sub_data_type_code'].str.lower().str[0] == 'c']

        #     if wx_data.empty:
        #         pass
        #     else:
        #         wx_data.rename(columns = {"value":"wheel_mass", "number":"wheel_mass_number", "id":"type10_id"}, inplace=True)
        #         # wx_data.to_sql(
        #         #     "traffic_e_type10_wheel_mass",
        #         #     con=config.ENGINE,
        #         #     schema="trafc",
        #         #     if_exists="append",
        #         #     index=False,
        #         #     method=tools.psql_insert_copy,
        #         # )

        #     if ax_data.empty:
        #         pass
        #     else:
        #         ax_data.rename(columns = {"value":"axle_mass", "number":"axle_mass_number", "id":"type10_id"}, inplace=True)
        #     #     ax_data.to_sql(
        #     #         "traffic_e_type10_axle_mass",
        #     #         con=config.ENGINE,
        #     #         schema="trafc",
        #     #         if_exists="append",
        #     #         index=False,
        #     #         method=tools.psql_insert_copy,
        #     #     )

        #     if gx_data.empty:
        #         pass
        #     else:
        #         gx_data.rename(columns = {"value":"axle_group_mass", "number":"axle_group_mass_number", "id":"type10_id"}, inplace=True)
        #     #     gx_data.to_sql(
        #     #         "traffic_e_type10_axle_group_mass",
        #     #         con=config.ENGINE,
        #     #         schema="trafc",
        #     #         if_exists="append",
        #     #         index=False,
        #     #         method=tools.psql_insert_copy,
        #     #     )

        #     if sx_data.empty:
        #         pass
        #     else:
        #         sx_data.rename(columns = {"value":"axle_spacing_cm", "number":"axle_spacing_number", "id":"type10_id"}, inplace=True)
        #         sx_data = sx_data.drop(["offset_sensor_detection_code","mass_measurement_resolution_kg"], axis=1)
        #     #     sx_data.to_sql(
        #     #         "traffic_e_type10_axle_spacing",
        #     #         con=config.ENGINE,
        #     #         schema="trafc",
        #     #         if_exists="append",
        #     #         index=False,
        #     #         method=tools.psql_insert_copy,
        #     #     )

        #     if tx_data.empty:
        #         pass
        #     else:
        #         tx_data.rename(columns = {"value":"tyre_code", "number":"tyre_number", "id":"type10_id"}, inplace=True)
        #         tx_data = tx_data.drop(["offset_sensor_detection_code","mass_measurement_resolution_kg"], axis=1)
        #     #     tx_data.to_sql(
        #     #         "traffic_e_type10_tyre",
        #     #         con=config.ENGINE,
        #     #         schema="trafc",
        #     #         if_exists="append",
        #     #         index=False,
        #     #         method=tools.psql_insert_copy,
        #     #     )

        #     if cx_data.empty:
        #         pass
        #     else:
        #         cx_data.rename(columns = {"value":"group_axle_count", "number":"group_axle_number", "id":"type10_id"}, inplace=True)
        #         cx_data = cx_data.drop(["offset_sensor_detection_code","mass_measurement_resolution_kg"], axis=1)
        #     #     cx_data.to_sql(
        #     #         "traffic_e_type10_axle_group_configuration",
        #     #         con=config.ENGINE,
        #     #         schema="trafc",
        #     #         if_exists="append",
        #     #         index=False,
        #     #         method=tools.psql_insert_copy,
        #     #     )

        #     if vx_data.empty:
        #         pass
        #     else:
        #         vx_data.rename(columns = {"value":"group_axle_count", "offset_sensor_detection_code":"vehicle_registration_number" ,"number":"group_axle_number", "id":"type10_id"}, inplace=True)
        #         vx_data = vx_data.drop(["mass_measurement_resolution_kg"], axis=1)
        #     #     vx_data.to_sql(
        #     #         "traffic_e_type10_identification_data_images",
        #     #         con=config.ENGINE,
        #     #         schema="trafc",
        #     #         if_exists="append",
        #     #         index=False,
        #     #         method=tools.psql_insert_copy,
        #     #     )

        # d2 = tools.dtype60(df)
        # if d2 is None:
        #     pass
        # else:
        #     data = tools.data_join(d2, header)
        #     data.drop("station_name", axis=1, inplace=True)
        #     data = data.merge(
        #         d2, how="outer", on=["site_id", "start_datetime", "lane_number"]
        #     )

        # data = data.rename(columns=(lambda x: x[:-2] if '_x' in x else x))
        # header = header.rename(columns=(lambda x: x[:-2] if '_x' in x else x))

        # header = tools.header_calcs(header, data, 21)
        # header = tools.header_calcs(header, data, 30)
        # header = tools.header_calcs(header, data, 70)
        # header = tools.header_calcs(header, data, 60)

        # data = data[data.columns.intersection(config.DATA_COLUMN_NAMES)]
        # header = header[header.columns.intersection(config.HEADER_COLUMN_NAMES)]

        # # tools.push_to_partitioned_table(
        # #     data,
        # #     "electronic_count_data_partitioned",
        # #     ["site_id", "start_datetime", "lane_number"],
        # # )

        # # tools.push_to_db(
        # #     header,
        # #     "electronic_count_header",
        # #     ["site_id", "start_datetime", "end_datetime"],
        # # )

        # data.to_csv(r"C:\Users\MB2705851\Desktop\Temp\rsa_traffic_counts\data.csv", index=False, mode='a')
        # header.to_csv(r"C:\Users\MB2705851\Desktop\Temp\rsa_traffic_counts\header.csv", index=False, mode='a')

    #     with open(
    #         os.path.expanduser(config.FILES_COMPLETE),
    #         "a",
    #         newline="",
    #     ) as f:
    #         write = csv.writer(f)
    #         write.writerows([[files]])


In [15]:
print(df)

       0       1       2         3       4       5        6       7       8   \
0      H0     200     002       RSA    Data  Format  Version    2.00    None   
1      S0    0013    0013      Paul  Kruger     Ext     None    None    None   
2      I0  RT8010     Fam  140426MA    None    None     None    None    None   
3      D0       M       L      None    None    None     None    None    None   
4      D1  211201  000000    211231  240000  211020   113132  211130  230005   
...    ..     ...     ...       ...     ...     ...      ...     ...     ...   
35722  30       0  211231      2400      15      04     0000    0002    0000   
35723  70       0  211231      2400      15      01     0007    0000    0002   
35724  70       0  211231      2400      15      02     0011    0000    0000   
35725  70       0  211231      2400      15      03     0011    0000    0000   
35726  70       0  211231      2400      15      04     0002    0000    0000   

           9         10        11      