# Import data

In [1]:
import pandas as pd

In [2]:
# import data
dir = 'infrastructure/original/'
filename = 'Roads_InfoAboutEachLRP.csv'
df_roadinfo = pd.read_csv(dir + filename)

In [3]:
# import data
dir = 'infrastructure/original/'
filename = 'BMMS_overview.xlsx'
df_bridges = pd.read_excel(dir + filename)

In [4]:
# import roads_transposed.csv
dir = ''
filename = 'CORRECT_ROAD_ATM.csv' # replace this with the interpolated data
df_roads = pd.read_csv(dir + filename)

# Clean bridge data (by Yao)

In [5]:
# Swap 'lat' and 'lon' if both are out of bounds

# Define latitude and longitude boundaries
LAT_MIN, LAT_MAX = 20, 28
LON_MIN, LON_MAX = 88, 93

def swap_coordinates(row):
    if not (LAT_MIN <= row['lat'] <= LAT_MAX) and not (LON_MIN <= row['lon'] <= LON_MAX):
        row['lat'], row['lon'] = row['lon'], row['lat']  # Swap values
    return row

def clean_bridge_data(df_bridges):
    # make a copy of the original data
    bridge = df_bridges.copy()

    # delete the rows with missing values in column 'lat' or 'lon'
    bridge_new = bridge.dropna(subset=['lat', 'lon'])

    # delete the rows with 0 in column 'lat' or 'lon'
    bridge_new = bridge_new[(bridge_new['lat'] != 0) & (bridge_new['lon'] != 0)]

    # create a new column 'road_LRPName' by combining 'road' and 'LRPName'
    bridge_new = bridge_new.copy()
    bridge_new['road_LRPName'] = bridge_new['road'] + '_' + bridge_new['LRPName']

    # add a column 'null_num' with the calculattion of the number of null values for the each row
    bridge_new['null_num'] = bridge_new.isnull().sum(axis=1)

    # delete the rows with duplicated 'road_LRPName' and keep the row with the minimum number of null values
    bridge_new = bridge_new.loc[bridge_new.groupby('road_LRPName')['null_num'].idxmin()]

    # find the duplicated 'road_LRPName'
    bridge_new[bridge_new.duplicated(subset='road_LRPName', keep=False)]

    # sort the data by 'road'
    #bridge_new = bridge_new.sort_values(by=['road', 'LRPName'])

    # Find the rows with 'lat' and 'lon' out of bounds
    bridge_new[(bridge_new['lat'] < LAT_MIN) |
               (bridge_new['lat'] > LAT_MAX) |
               (bridge_new['lon'] < LON_MIN) | 
               (bridge_new['lon'] > LON_MAX)]
    
    bridge_new = bridge_new.apply(swap_coordinates, axis=1)

    # Find the rows with 'lat' and 'lon' out of bounds
    bridge_new[(bridge_new['lat'] < LAT_MIN) | 
               (bridge_new['lat'] > LAT_MAX) | 
               (bridge_new['lon'] < LON_MIN) | 
               (bridge_new['lon'] > LON_MAX)]
    
    return bridge_new

In [6]:
cleaned_bridges = clean_bridge_data(df_bridges)
cleaned_bridges

Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,...,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc,road_LRPName,null_num
1869,N102,2.059,Box Culvert,LRP002a,Brorechon,1.50,A,114920,Comilla (Mainamati)-Brahmanbaria (Sarail) Road,2.059,...,1.0,Comilla,Comilla,Comilla,Gouripur,23.493750,91.106472,bcs1,N102_LRP002a,0
1870,N102,2.363,Box Culvert,LRP002b,SHABAR BAZAR,1.60,A,101250,Comilla (Mainamati)-Brahmanbaria (Sarail) Road,2.363,...,1.0,Comilla,Comilla,Comilla,Gouripur,23.496389,91.105944,bcs1,N102_LRP002b,0
1871,N102,2.511,Box Culvert,LRP002c,ZUMUR BOX CULVERT,1.40,A,101252,Comilla (Mainamati)-Brahmanbaria (Sarail) Road,2.511,...,1.0,Comilla,Comilla,Comilla,Gouripur,23.497722,91.105639,bcs1,N102_LRP002c,0
15844,N102,4.249,Slab Culvert,LRP004a,DEAB POR SLAB CULVERT,2.50,B,101256,Comilla (Mainamati)-Brahmanbaria (Sarail) Road,4.249,...,1.0,Comilla,Comilla,Comilla,Gouripur,23.511250,91.098194,bcs1,N102_LRP004a,0
16838,N102,7.036,Box Culvert,LRP007a,RAMPUR,3.75,C,101262,Comilla (Mainamati)-Brahmanbaria (Sarail) Road,7.036,...,1.0,Comilla,Comilla,Comilla,Gouripur,23.532417,91.083528,bcs1,N102_LRP007a,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15394,Z8948,1.010,Box Culvert,LRP001a,.,2.00,A,116834,Fakirhat-Khasherhat Road,1.010,...,1.0,Barisal,Barisal,Bhola,Bhola,22.419167,90.787500,bcs1,Z8948_LRP001a,0
15396,Z8948,2.070,Steel Beam & RCC Slab,LRP002a,.,18.50,A,116835,Fakirhat-Khasherhat Road,2.070,...,3.0,Barisal,Barisal,Bhola,Bhola,22.414167,90.791667,bcs1,Z8948_LRP002a,0
15397,Z8948,2.100,Steel Beam & RCC Slab,LRP002c,.,18.50,A,116836,Fakirhat-Khasherhat Road,2.100,...,3.0,Barisal,Barisal,Bhola,Bhola,22.414167,90.791944,bcs1,Z8948_LRP002c,0
15392,Z8948,0.306,Box Culvert,LRPSa,.,2.00,A,116832,Fakirhat-Khasherhat Road,0.306,...,1.0,Barisal,Barisal,Bhola,Bhola,22.425278,90.787778,bcs1,Z8948_LRPSa,0


## Save excel file

In [None]:
# Save the cleaned data to a .xlsx file
dir = 'infrastructure/cleaned/'
filename = 'BMMS_overview_cleaned_prelim.xlsx'
cleaned_bridges.to_excel(dir + filename, index=False, sheet_name='BMMS_overview')

# Remove bridges without roads

In [7]:
# Create a dictionary to store the LRPE for each road
lrpe_dict = {}

# Create a dictionary to store the one LRP before LRPE for each road
lrp_before_lrpe = {}

# Iterate over each unique road
for road in df_roads['road'].unique():
    # Get the rows for the current road
    road_rows = df_roads[df_roads['road'] == road]
    
    # Find the index of LRPE
    if 'LRPE' in road_rows['lrp'].values:
        lrpe_indices = road_rows[road_rows['lrp'] == 'LRPE'].index
        if len(lrpe_indices) > 0:
            lrpe_index = lrpe_indices[0]
            lrpe_dict[road] = road_rows.loc[lrpe_index, 'lrp']
            if lrpe_index > 0 and lrpe_index - 1 < len(road_rows):
                lrp_before_lrpe[road] = road_rows.iloc[lrpe_index - 1]['lrp']
            else:
                lrp_before_lrpe[road] = road_rows.iloc[-2]['lrp']
        else:
            lrp_before_lrpe[road] = road_rows.iloc[-2]['lrp']
    else:
        # Remember the last LRP if there is no LRPE
        lrp_before_lrpe[road] = road_rows.iloc[-1]['lrp']
        lrpe_dict[road] = None  # Add a placeholder for LRPE

# Combine the dictionaries into a dataframe
df_lrpe_road = pd.DataFrame({
    'road': lrpe_dict.keys(),
    'LRP_before_E': lrp_before_lrpe.values(),
    'LRPE': lrpe_dict.values()
})

df_lrpe_road


Unnamed: 0,road,LRP_before_E,LRPE
0,N1,LRP467,LRPE
1,N101,LRP006,LRPE
2,N102,LRP082a,LRPE
3,N103,LRP005,LRPE
4,N104,LRP049c,LRPE
...,...,...,...
872,Z8910,LRP039b,LRPE
873,Z8913,LRP025a,
874,Z8915,LRP016,LRPE
875,Z8916,LRP011b,LRPE


In [8]:
# Sort the dataframe by road and LRP, but keep 'LRPS' at the start and 'LRPE' in its original place
def custom_sort(df):
    df['lrp_order'] = df['LRPName'].apply(lambda x: 0 if 'LRPS' in x else 1)
    df = df.sort_values(by=['road', 'lrp_order', 'LRPName'])
    df = df.drop(columns=['lrp_order'])
    return df

cleaned_bridges_sorted = custom_sort(cleaned_bridges)

In [9]:
# Function to remove rows with lrp after 'LRP_before_E' for each unique road
def remove_rows_after_lrp_before_e(df, df_lrpe):
    for _, row in df_lrpe.iterrows():
        road = row['road']
        lrp_before_e = row['LRP_before_E']
        
        # Get the index of the row with 'LRP_before_E'
        lrp_before_e_index = df[(df['road'] == road) & (df['LRPName'] == lrp_before_e)].index
        
        if not lrp_before_e_index.empty:
            lrp_before_e_index = lrp_before_e_index[0]
            # Drop rows after 'LRP_before_E'
            df = df.drop(df[(df['road'] == road) & (df.index > lrp_before_e_index)].index)
    
    return df

# Apply the function to df_roads_bridges_combined_sorted
df_bridges_no_lrp_after_e = remove_rows_after_lrp_before_e(cleaned_bridges_sorted, df_lrpe_road)
df_bridges_no_lrp_after_e

Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,...,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc,road_LRPName,null_num
0,N1,1.800,Box Culvert,LRP001a,.,11.3,A,117861,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,1.800,...,2.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.702889,90.450389,bcs1,N1_LRP001a,0
1,N1,4.925,Box Culvert,LRP004b,.,6.6,A,117862,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,4.925,...,1.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.693611,90.478833,bcs1,N1_LRP004b,0
13420,N1,8.976,PC Girder Bridge,LRP008b,KANCHPUR PC GIRDER BRIDGE,397.0,C,101102,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,8.976,...,8.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.702083,90.515917,bcs1,N1_LRP008b,0
15398,N1,10.543,Box Culvert,LRP010a,KATCHPUR BOX CULVERT,8.0,B,101106,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.543,...,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.702056,90.528194,bcs1,N1_LRP010a,0
3,N1,10.880,Box Culvert,LRP010b,NOYAPARA CULVERT,6.3,A,112531,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.880,...,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.699833,90.530722,bcs1,N1_LRP010b,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15392,Z8948,0.306,Box Culvert,LRPSa,.,2.0,A,116832,Fakirhat-Khasherhat Road,0.306,...,1.0,Barisal,Barisal,Bhola,Bhola,22.425278,90.787778,bcs1,Z8948_LRPSa,0
15393,Z8948,0.640,Box Culvert,LRPSb,.,2.0,A,116833,Fakirhat-Khasherhat Road,0.640,...,1.0,Barisal,Barisal,Bhola,Bhola,22.422778,90.787222,bcs1,Z8948_LRPSb,0
15394,Z8948,1.010,Box Culvert,LRP001a,.,2.0,A,116834,Fakirhat-Khasherhat Road,1.010,...,1.0,Barisal,Barisal,Bhola,Bhola,22.419167,90.787500,bcs1,Z8948_LRP001a,0
15396,Z8948,2.070,Steel Beam & RCC Slab,LRP002a,.,18.5,A,116835,Fakirhat-Khasherhat Road,2.070,...,3.0,Barisal,Barisal,Bhola,Bhola,22.414167,90.791667,bcs1,Z8948_LRP002a,0


In [10]:
# Remove columns 'road_LRPName' and 'null_num'
df_final_bridges = df_bridges_no_lrp_after_e.drop(columns=['road_LRPName', 'null_num'])

# reset the index
df_final_bridges = df_final_bridges.reset_index(drop=True)

In [11]:
import numpy as np

# compare cleaned_bridges with df_final_bridges; print lrps that are different
for road in df_final_bridges['road'].unique():
    cleaned_lrps = cleaned_bridges[cleaned_bridges['road'] == road]['LRPName'].values
    final_lrps = df_final_bridges[df_final_bridges['road'] == road]['LRPName'].values
    
    if not np.array_equal(cleaned_lrps, final_lrps):
        print(df_lrpe_road[df_lrpe_road['road'] == road]["LRP_before_E"])
        print(f'Road: {road}')
        print(f'Cleaned LRPs: {cleaned_lrps}')
        print(f'Final LRPs: {final_lrps}')
        print()

2    LRP082a
Name: LRP_before_E, dtype: object
Road: N102
Cleaned LRPs: ['LRP002a' 'LRP002b' 'LRP002c' 'LRP004a' 'LRP007a' 'LRP008a' 'LRP009a'
 'LRP010c' 'LRP011a' 'LRP011b' 'LRP012a' 'LRP015a' 'LRP017a' 'LRP018a'
 'LRP019a' 'LRP019b' 'LRP021a' 'LRP021b' 'LRP021c' 'LRP022a' 'LRP023b'
 'LRP024b' 'LRP025b' 'LRP026a' 'LRP027a' 'LRP028a' 'LRP028c' 'LRP029a'
 'LRP029b' 'LRP029d' 'LRP030a' 'LRP031a' 'LRP031b' 'LRP031c' 'LRP032a'
 'LRP033a' 'LRP033c' 'LRP034a' 'LRP034b' 'LRP035a' 'LRP035c' 'LRP036a'
 'LRP036b' 'LRP036d' 'LRP037a' 'LRP037c' 'LRP037d' 'LRP038b' 'LRP038d'
 'LRP039a' 'LRP040a' 'LRP041a' 'LRP043a' 'LRP044a' 'LRP044c' 'LRP045a'
 'LRP046a' 'LRP047a' 'LRP048a' 'LRP049a' 'LRP050a' 'LRP050c' 'LRP051b'
 'LRP052a' 'LRP053a' 'LRP055a' 'LRP055c' 'LRP056b' 'LRP058a' 'LRP059a'
 'LRP059b' 'LRP060a' 'LRP062b' 'LRP062c' 'LRP063a' 'LRP064b' 'LRP065a'
 'LRP066a' 'LRP067a' 'LRP067c' 'LRP069a' 'LRP070a' 'LRP073e' 'LRP077a'
 'LRP078a' 'LRP078b' 'LRP079a' 'LRP080a' 'LRP082a' 'LRPSc']
Final LRPs: ['LR

## Save excel file

In [12]:
# Save the cleaned data to a new excel file
dir = 'infrastructure/cleaned/'
filename = 'BMMS_overview__cleaned_bridges_after_LPRE_removed.xlsx'
df_final_bridges.to_excel(dir + filename, index=False, sheet_name='BMMS_overview')

# Adjust lat lon

In [13]:
df_roads

Unnamed: 0,road,lrp,lat,lon
0,N1,LRPS,23.706028,90.443333
1,N1,LRPSa,23.704403,90.446903
2,N1,LRPSb,23.702778,90.450472
3,N1,LRP001,23.702722,90.455914
4,N1,LRP002,23.702667,90.461356
...,...,...,...,...
52205,Z8943,LRP007,22.429286,90.783722
52206,Z8943,LRP008,22.429768,90.779305
52207,Z8943,LRP008a,22.430249,90.774888
52208,Z8943,LRP008b,22.430249,90.774860


In [14]:
df_final_bridges

Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,width,constructionYear,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc
0,N1,1.800,Box Culvert,LRP001a,.,11.3,A,117861,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,1.800,19.50,2005.0,2.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.702889,90.450389,bcs1
1,N1,4.925,Box Culvert,LRP004b,.,6.6,A,117862,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,4.925,35.40,2006.0,1.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.693611,90.478833,bcs1
2,N1,8.976,PC Girder Bridge,LRP008b,KANCHPUR PC GIRDER BRIDGE,397.0,C,101102,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,8.976,14.65,1986.0,8.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.702083,90.515917,bcs1
3,N1,10.543,Box Culvert,LRP010a,KATCHPUR BOX CULVERT,8.0,B,101106,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.543,21.70,1987.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.702056,90.528194,bcs1
4,N1,10.880,Box Culvert,LRP010b,NOYAPARA CULVERT,6.3,A,112531,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.880,12.20,1992.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.699833,90.530722,bcs1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17778,Z8948,0.306,Box Culvert,LRPSa,.,2.0,A,116832,Fakirhat-Khasherhat Road,0.306,10.00,1998.0,1.0,Barisal,Barisal,Bhola,Bhola,22.425278,90.787778,bcs1
17779,Z8948,0.640,Box Culvert,LRPSb,.,2.0,A,116833,Fakirhat-Khasherhat Road,0.640,10.00,1998.0,1.0,Barisal,Barisal,Bhola,Bhola,22.422778,90.787222,bcs1
17780,Z8948,1.010,Box Culvert,LRP001a,.,2.0,A,116834,Fakirhat-Khasherhat Road,1.010,9.70,1995.0,1.0,Barisal,Barisal,Bhola,Bhola,22.419167,90.787500,bcs1
17781,Z8948,2.070,Steel Beam & RCC Slab,LRP002a,.,18.5,A,116835,Fakirhat-Khasherhat Road,2.070,3.00,1994.0,3.0,Barisal,Barisal,Bhola,Bhola,22.414167,90.791667,bcs1


In [15]:
import geopandas as gpd
from shapely.geometry import Point
from scipy.spatial import cKDTree

def adjust_bridge_coordinates_simple(df_bridges, df_roads):
    adjusted_bridges = df_bridges.copy()
    
    for road in df_bridges['road'].unique():
        road_rows = df_roads[df_roads['road'] == road]
        road_points = list(zip(road_rows['lon'], road_rows['lat']))
        if len(road_points) == 0:
            continue
        tree = cKDTree(road_points)
        
        for idx, bridge_row in df_bridges[df_bridges['road'] == road].iterrows():
            bridge_point = (bridge_row['lon'], bridge_row['lat'])
            dist, idx_closest = tree.query(bridge_point)
            closest_point = road_points[idx_closest]
            
            adjusted_bridges.at[idx, 'lat'] = closest_point[1]
            adjusted_bridges.at[idx, 'lon'] = closest_point[0]
    
    return adjusted_bridges

df_final_bridges_adjusted = adjust_bridge_coordinates_simple(df_final_bridges, df_roads)
df_final_bridges_adjusted


Unnamed: 0,road,km,type,LRPName,name,length,condition,structureNr,roadName,chainage,width,constructionYear,spans,zone,circle,division,sub-division,lat,lon,EstimatedLoc
0,N1,1.800,Box Culvert,LRP001a,.,11.3,A,117861,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,1.800,19.50,2005.0,2.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.702778,90.450472,bcs1
1,N1,4.925,Box Culvert,LRP004b,.,6.6,A,117862,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,4.925,35.40,2006.0,1.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.702500,90.477683,bcs1
2,N1,8.976,PC Girder Bridge,LRP008b,KANCHPUR PC GIRDER BRIDGE,397.0,C,101102,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,8.976,14.65,1986.0,8.0,Dhaka,Dhaka,Narayanganj,Narayanganj-1,23.702111,90.515778,bcs1
3,N1,10.543,Box Culvert,LRP010a,KATCHPUR BOX CULVERT,8.0,B,101106,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.543,21.70,1987.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.701528,90.528194,bcs1
4,N1,10.880,Box Culvert,LRP010b,NOYAPARA CULVERT,6.3,A,112531,Dhaka (Jatrabari)-Comilla (Mainamati)-Chittago...,10.880,12.20,1992.0,2.0,Dhaka,Dhaka,Narayanganj,Vitikandi,23.699861,90.530722,bcs1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17778,Z8948,0.306,Box Culvert,LRPSa,.,2.0,A,116832,Fakirhat-Khasherhat Road,0.306,10.00,1998.0,1.0,Barisal,Barisal,Bhola,Bhola,22.425278,90.787778,bcs1
17779,Z8948,0.640,Box Culvert,LRPSb,.,2.0,A,116833,Fakirhat-Khasherhat Road,0.640,10.00,1998.0,1.0,Barisal,Barisal,Bhola,Bhola,22.422778,90.787222,bcs1
17780,Z8948,1.010,Box Culvert,LRP001a,.,2.0,A,116834,Fakirhat-Khasherhat Road,1.010,9.70,1995.0,1.0,Barisal,Barisal,Bhola,Bhola,22.419167,90.787500,bcs1
17781,Z8948,2.070,Steel Beam & RCC Slab,LRP002a,.,18.5,A,116835,Fakirhat-Khasherhat Road,2.070,3.00,1994.0,3.0,Barisal,Barisal,Bhola,Bhola,22.414167,90.791667,bcs1


## Save excel file: Final

In [16]:
# Save the cleaned data to a .xlsx file
dir = 'infrastructure/cleaned/'
filename = 'BMMS_overview_cleaned_final.xlsx'
df_final_bridges_adjusted.to_excel(dir + filename, index=False, sheet_name='BMMS_overview')