# Clean Traffic Data

In [1]:
import pandas as pd
import numpy as np

# Import data

In [2]:
## Import road traffic data
roads = ['N1', 'N102', 'N104', 'N105', 'N106', 'N2', 'N204', 'N207', 'N208']
traffic_data = {}

file_path = '../data/processed/'

for road in roads:
    file_name = f"{file_path + road + '_traffic'}.csv"  # Assuming each road has a corresponding CSV file named after it
    try:
        traffic_data[road] = pd.read_csv(file_name)
    except FileNotFoundError:
        print(f"File {file_name} not found.")

traffic_data

{'N1':     Link no                                            Name   S_LRP  S_Offset  \
 0     N1-1L        Jatrabari - Int.with Z1101 (Left) (Left)    LRPS         0   
 1     N1-1R       Jatrabari - Int.with Z1101 (Left) (Right)    LRPS         0   
 2     N1-2L   Int.with Z1101 - Signboard (Left) R111 (Left)    LRPS       822   
 3     N1-2R  Int.with Z1101 - Signboard (Left) R111 (Right)    LRPS       822   
 4     N1-3L          Signboard - Shimrail (Left)R110 (Left)    LRPS      4175   
 ..      ...                                             ...     ...       ...   
 100   N1-65          Coxsbazar Link Road N110-Maricha Z1009  LRP386       724   
 101   N1-66             Maricha Z1009-Ukhia Dakbanglo Z1503  LRP403       217   
 102   N1-67            Ukhia Dakbanglo Z1503 - Gundum Z1504  LRP413       300   
 103   N1-68              GundumGundum Z1504 - Whykong Z1133  LRP420       900   
 104   N1-69                          Whykong Z1133 - Teknaf  LRP433       521   
 
      S_

# Clean

In [3]:
traffic_data_cleaned = {}

for road in traffic_data.keys():
    df_road = traffic_data[road].copy()  # Create a copy to avoid modifying the original DataFrame

    # if df_road['Link no'] has 'R' at the end, drop rows
    df_road = df_road[~df_road['Link no'].str.endswith('R')]

    # if df_road['Link no'] has 'L' at the end, remove 'L' from the end
    df_road.loc[:, 'Link no'] = df_road['Link no'].str.replace('L', '', regex=False)

    # Duplicate the column 'Link no' to 'Road' and insert it as the first column
    df_road.insert(0, 'Road', df_road['Link no'])

    # 'Link no' column --> split the string by '-' and take the last part
    df_road.loc[:, 'Link no'] = df_road['Link no'].str.split('-').str[-1]

    # 'Road' column --> split the string by '-' and take the first part
    df_road.loc[:, 'Road'] = df_road['Road'].str.split('-').str[0]

    # Remove duplicated rows based on 'S_Chainage' and 'E_Chainage' columns
    df_road = df_road.drop_duplicates(subset=['S_Chainage', 'E_Chainage'], keep='last')

    # Remove rows where 'S_Chainage' and 'E_Chainage' are the same
    df_road = df_road[df_road['S_Chainage'] != df_road['E_Chainage']]

    # Save the cleaned DataFrame to the dictionary
    traffic_data_cleaned[road] = df_road

    # Save the cleaned data to CSV files
    file_path = '../data/processed/'
    cleaned_file_name = f"{file_path + road + '_traffic_cleaned'}.csv"
    df_road.to_csv(cleaned_file_name, index=False)

In [4]:
traffic_data_cleaned['N1']

Unnamed: 0,Road,Link no,Name,S_LRP,S_Offset,S_Chainage,E_LRP,E_Offset,E_Chainage,Length (km),...,Car,Auto Rickshaw,Motor Cycle,Bi-Cycle,Cycle Rickshaw,Cart,Motorized,Non Motorized,Total AADT,(AADT)
0,N1,1,Jatrabari - Int.with Z1101 (Left) (Left),LRPS,0,0.000,LRPS,822,0.822,0.822,...,1851.0,2980.0,398.0,232.0,889.0,0.0,18236.0,1121.0,19357.0,19357.0
2,N1,2,Int.with Z1101 - Signboard (Left) R111 (Left),LRPS,822,0.822,LRPS,4175,4.175,3.353,...,2608.0,2508.0,436.0,213.0,1088.0,0.0,20236.0,1301.0,21537.0,21537.0
4,N1,3,Signboard - Shimrail (Left)R110 (Left),LRPS,4175,4.175,LRPS,7181,7.181,3.006,...,1690.0,2266.0,1087.0,75.0,1198.0,0.0,16288.0,1273.0,17561.0,17561.0
6,N1,4,Shimrail - Katchpur (Left)N2 (Left),LRPS,7181,7.181,LRP009,260,8.763,1.582,...,1579.0,3154.0,1162.0,211.0,1077.0,0.0,16001.0,1288.0,17289.0,17289.0
8,N1,5,Katchpur - Madanpur (Left)N105 (Left),LRP009,260,8.763,LRP012,439,11.936,3.173,...,1579.0,3154.0,1154.0,211.0,1077.0,0.0,22591.0,1288.0,23879.0,23879.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,N1,65,Coxsbazar Link Road N110-Maricha Z1009,LRP386,724,381.481,LRP403,217,398.205,16.724,...,142.0,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,5820.0
101,N1,66,Maricha Z1009-Ukhia Dakbanglo Z1503,LRP403,217,398.205,LRP413,300,408.434,10.229,...,142.0,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,5820.0
102,N1,67,Ukhia Dakbanglo Z1503 - Gundum Z1504,LRP413,300,408.434,LRP420,900,416.044,7.610,...,142.0,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,5820.0
103,N1,68,GundumGundum Z1504 - Whykong Z1133,LRP420,900,416.044,LRP433,521,428.908,12.864,...,142.0,2476.0,419.0,97.0,1042.0,0.0,4681.0,1139.0,5820.0,5820.0
