# G16 Code for creating demo data

## 0. Load data

In [19]:
import pandas as pd
import numpy as np
import sys
print(sys.prefix)

c:\Users\Move\anaconda3\envs\epa133a


In [20]:
# import data
dir = '../data/raw/'
filename = 'BMMS_overview.xlsx'
df_bridges = pd.read_excel(dir + filename)

In [21]:
#df_bridges.head()

In [22]:
# import data
dir = '../data/raw/'
filename = '_roads3.csv' # replace this with the interpolated data
df_roads = pd.read_csv(dir + filename)

In [23]:
#df_roads.head()

In [24]:
# print shape of dataframes
print(df_bridges.shape)
print(df_roads.shape)

(20415, 20)
(51348, 8)


## 1. Remove duplicates from BMMS_overview.xlsx and _roads3.csv

## 2. Combine data from BMMS_overview.xlsx & _roads3.csv

In [25]:
# change the column names of the bridges dataframe: LRPName -> lrp
df_bridges.rename(columns={'LRPName': 'lrp'}, inplace=True)


In [26]:
# add a column named 'model_type' to the df_bridges dataframe and fill it with 'bridge'
df_bridges['model_type'] = 'bridge'

# add a column named 'model_type' to the df_roads dataframe and fill it with 'link';
# but if the last character in the column 'lrp' isn't number, fill it with 'bridge', except for 'S' and 'E'
df_roads['model_type'] = 'link'
df_roads.loc[df_roads['lrp'].str[-1].str.isnumeric() == False, 'model_type'] = 'bridge'
df_roads.loc[df_roads['lrp'].str[-1] == 'S', 'model_type'] = 'link'
df_roads.loc[df_roads['lrp'].str[-1] == 'E', 'model_type'] = 'link'


In [27]:
#df_roads

In [28]:
# make a copy of the df_bridges dataframe and name it df_bridges_original
df_bridges_original = df_bridges.copy()

# merge the df_bridges and df_roads dataframes
df_concat = pd.concat([df_bridges, df_roads])

In [29]:
#df_concat.head()

In [30]:
# sort the df_concat dataframe, groupedby 'road', by 'chainage'
df_concat = df_concat.sort_values(by=['road', 'chainage'])

In [31]:
#df_concat.head()

## 3. Build the demo dataframe

In [32]:
# if the 'model_type' is 'bridge' and the 'condition' is NaN, drop the row; Keep the rows with 'model_type' as 'link'
df_concat = df_concat[~((df_concat['model_type'] == 'bridge') & (df_concat['condition'].isna())) | (df_concat['model_type'] == 'link')]

In [33]:
# Initialize an empty list to store the rows of the new DataFrame
new_rows = []

# Iterate over each unique road in df_concat
for road in df_concat['road'].unique():
    road_rows = df_concat[df_concat['road'] == road].reset_index(drop=True)
    for i, row in road_rows.iterrows():
        if i == 0:
            model_type = 'source'
        elif i == len(road_rows) - 1:
            model_type = 'sink'
        else:
            model_type = row['model_type']
        
        length = 0 if i == 0 else row['chainage'] - road_rows.loc[i-1, 'chainage']
        new_row = {
            'road': row['road'],
            'id': f"{row['road']}_{i}",
            'model_type': model_type,
            'name': row['name'],
            'lat': row['lat'],
            'lon': row['lon'],
            'length': length,
            'condition': row['condition']
        }
        new_rows.append(new_row)

# Convert the list to a DataFrame
df_demo = pd.DataFrame(new_rows)

In [34]:
#df_demo.head()

In [35]:
# print shape of df_demo
print(df_demo.shape)

(41657, 8)


## 4. Save demo file as a csv

In [36]:
# save the df_demo dataframe to a csv file
dir = '../data/processed/'
filename = 'demo_100.csv'
df_demo.to_csv(dir + filename, index=False)