In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [2]:
data = pd.read_csv('bridge_digital_twin_dataset.csv')

In [3]:
data.head()

Unnamed: 0,Timestamp,Strain_microstrain,Deflection_mm,Vibration_ms2,Tilt_deg,Displacement_mm,Crack_Propagation_mm,Corrosion_Level_percent,Cable_Member_Tension_kN,Bearing_Joint_Forces_kN,...,Soil_Saturation_percent,Landslide_Ground_Movement,Simulated_Slope_Displacement_mm,High_Winds_Storms,Simulated_Wind_Load_Pressure_kPa,Abnormal_Traffic_Load_Surges,Simulated_Localized_Stress_Index,Energy_Harvesting_Potential_W,Estimated_Repair_Cost_USD_incremental,Carbon_Footprint_tCO2e_incremental
0,2023-01-01 00:00:00,,12.663104,1.275955,0.58734,22.576518,-8.3e-05,0.00291,404.394906,233.052345,...,9.888758,0.0,0.022592,0.0,0.211946,0.0,0.215354,0.081403,1.343588,0.001348
1,2023-01-01 00:01:00,599.936448,12.235297,0.716857,0.657122,21.202649,9.2e-05,0.004601,367.821051,237.545735,...,9.920331,0.0,0.014647,0.0,0.231188,0.0,0.218747,0.107097,1.397207,0.001299
2,2023-01-01 00:02:00,573.878472,12.447144,0.834753,0.615011,,-0.000377,0.005115,402.393742,239.164832,...,,0.0,0.0,0.0,0.199192,0.0,0.213449,0.141938,1.318876,0.001378
3,2023-01-01 00:03:00,550.457302,12.554883,1.004917,0.617999,21.551945,-0.000646,0.005505,356.024238,237.645586,...,43.526026,0.0,0.0,0.0,0.198359,0.0,0.20819,0.192431,1.351773,0.001379
4,2023-01-01 00:04:00,572.82996,12.598818,1.176253,0.633534,21.711292,0.000247,-0.003415,413.33828,232.310381,...,44.138185,0.0,0.0,0.0,0.18811,0.0,0.26124,0.261609,,0.001267


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43200 entries, 0 to 43199
Data columns (total 54 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Timestamp                              43200 non-null  object 
 1   Strain_microstrain                     41379 non-null  float64
 2   Deflection_mm                          41467 non-null  float64
 3   Vibration_ms2                          41478 non-null  float64
 4   Tilt_deg                               41447 non-null  float64
 5   Displacement_mm                        41473 non-null  float64
 6   Crack_Propagation_mm                   41494 non-null  float64
 7   Corrosion_Level_percent                41531 non-null  float64
 8   Cable_Member_Tension_kN                41459 non-null  float64
 9   Bearing_Joint_Forces_kN                41515 non-null  float64
 10  Fatigue_Accumulation_au                41496 non-null  float64
 11  Mo

In [5]:
data.isna().sum()

Timestamp                                    0
Strain_microstrain                        1821
Deflection_mm                             1733
Vibration_ms2                             1722
Tilt_deg                                  1753
Displacement_mm                           1727
Crack_Propagation_mm                      1706
Corrosion_Level_percent                   1669
Cable_Member_Tension_kN                   1741
Bearing_Joint_Forces_kN                   1685
Fatigue_Accumulation_au                   1704
Modal_Frequency_Hz                        1719
Temperature_C                             1743
Humidity_percent                          1783
Wind_Speed_ms                             1726
Wind_Direction_deg                        1768
Precipitation_mmh                         1687
Water_Level_m                             1687
Seismic_Activity_ms2                      1693
Solar_Radiation_Wm2                       1728
Air_Quality_Index_AQI                     1739
Soil_Settleme

In [6]:
bridge_data = data[[
    "Strain_microstrain",
    "Deflection_mm",
    "Displacement_mm",
    "Localized_Strain_Hotspot",
    "Vibration_ms2",
    "Seismic_Activity_ms2",
    "Impact_Events_g",
    "Modal_Frequency_Hz",
    "Energy_Dissipation_au",
    "Temperature_C",
    "Structural_Health_Index_SHI"
]]
bridge_data

Unnamed: 0,Strain_microstrain,Deflection_mm,Displacement_mm,Localized_Strain_Hotspot,Vibration_ms2,Seismic_Activity_ms2,Impact_Events_g,Modal_Frequency_Hz,Energy_Dissipation_au,Temperature_C,Structural_Health_Index_SHI
0,,12.663104,22.576518,0.0,1.275955,-5.732291e-06,4.631398e-06,1.924556,0.016281,9.970055,0.859543
1,599.936448,12.235297,21.202649,0.0,0.716857,7.162631e-06,1.555199e-06,1.779174,0.021419,10.143691,0.863882
2,573.878472,12.447144,,0.0,0.834753,-1.061253e-06,1.015520e-05,1.934703,0.028388,10.119537,0.862827
3,550.457302,12.554883,21.551945,0.0,1.004917,5.572270e-06,4.094442e-06,1.916426,0.038486,11.240298,0.861611
4,572.829960,12.598818,21.711292,0.0,1.176253,6.600767e-08,-2.288449e-06,1.893920,0.052322,9.195454,0.857407
...,...,...,...,...,...,...,...,...,...,...,...
43195,571.243037,12.455028,20.842984,0.0,1.014705,3.784471e-06,1.605901e-06,1.828899,0.103265,10.606595,0.857921
43196,567.188954,12.526918,19.966430,0.0,0.977208,-2.434413e-06,,1.909683,0.100680,10.175937,0.858214
43197,564.514503,12.085239,21.452594,0.0,0.985721,-5.141186e-06,-1.394526e-05,1.918500,0.103688,10.000999,0.861265
43198,535.732467,12.585243,21.658293,0.0,1.053336,-3.070826e-06,-2.136437e-07,1.994553,0.108462,10.007617,0.859143


In [7]:
bridge_data.shape

(43200, 11)

In [8]:
bridge_data.isnull().sum()

Strain_microstrain             1821
Deflection_mm                  1733
Displacement_mm                1727
Localized_Strain_Hotspot       1754
Vibration_ms2                  1722
Seismic_Activity_ms2           1693
Impact_Events_g                1718
Modal_Frequency_Hz             1719
Energy_Dissipation_au          1694
Temperature_C                  1743
Structural_Health_Index_SHI    1700
dtype: int64

In [9]:
input_data = bridge_data.drop(columns=['Structural_Health_Index_SHI'])
X = input_data.fillna(input_data.mean())
Y = bridge_data['Structural_Health_Index_SHI']
Y = Y.fillna(Y.mean())


In [10]:
standardScaler = StandardScaler()
standard_X = standardScaler.fit_transform(X)
standard_X

array([[-3.30094121e-16, -6.62564791e-01, -7.96750993e-02, ...,
         2.97633571e-01, -2.07537932e-01, -6.48342660e-01],
       [-7.31733502e-01, -7.76753113e-01, -9.41061163e-01, ...,
        -1.18205667e+00, -2.03387467e-01, -6.25949177e-01],
       [-8.07393839e-01, -7.20207959e-01, -2.22747403e-15, ...,
         4.00906337e-01, -1.97759550e-01, -6.29064312e-01],
       ...,
       [-8.34582485e-01, -8.16806131e-01, -7.84350697e-01, ...,
         2.35995341e-01, -1.36942264e-01, -6.44351909e-01],
       [-9.18152237e-01, -6.83347122e-01, -6.55382238e-01, ...,
         1.01004562e+00, -1.33086368e-01, -6.43498443e-01],
       [-8.39923991e-01, -6.06023052e-01, -7.60994714e-01, ...,
         9.37925919e-01, -1.33089784e-01, -6.64617765e-01]])

In [11]:
x_train, x_test, y_train, y_test = train_test_split(X,Y,random_state=42,test_size=0.2)

For training model

In [12]:
model = RandomForestRegressor(n_estimators=200, max_depth=15, random_state=42)

In [13]:
y_train.isna().sum()

np.int64(0)

In [14]:
model.fit(x_train, y_train)

In [25]:
x_test

Unnamed: 0,Strain_microstrain,Deflection_mm,Displacement_mm,Localized_Strain_Hotspot,Vibration_ms2,Seismic_Activity_ms2,Impact_Events_g,Modal_Frequency_Hz,Energy_Dissipation_au,Temperature_C
15158,984.050457,16.082198,23.959808,0.0,1.466651,-2.700144e-06,1.967864e-06,1.818377,0.171439,24.518991
17980,921.898400,16.458668,22.754658,0.0,1.534001,1.333243e-06,-2.709368e-06,1.872317,0.169074,21.810472
14207,457.858002,8.727753,21.931385,0.0,0.888302,-2.520297e-06,1.993567e-06,1.852048,0.096654,21.826191
42340,1331.478746,22.716674,26.261156,0.0,1.423917,2.651300e-06,-6.523587e-07,1.972264,0.296117,12.910188
22155,1417.765843,22.353035,24.250625,0.0,1.888987,7.116124e-06,-1.007927e-05,1.978931,0.291616,12.088959
...,...,...,...,...,...,...,...,...,...,...
165,620.900854,15.265018,20.164991,0.0,0.753349,-9.940584e-07,9.497596e-06,1.950076,0.099567,5.322817
30402,639.356585,15.545345,21.851614,0.0,1.086448,-9.804755e-06,4.009954e-06,1.985870,0.116922,1.155645
8902,693.217783,16.837735,21.746068,0.0,0.764998,-1.236812e-06,5.512265e-06,1.953407,0.273241,8.903091
19293,1296.507880,22.029483,25.012460,0.0,1.749846,-2.699651e-06,6.947533e-06,1.895801,0.281288,15.654178


In [15]:
y_prediction = model.predict(x_test)

In [16]:
mae = mean_absolute_error(y_test, y_prediction) #mean absolute error
rmse = np.sqrt(mean_squared_error(y_test, y_prediction)) #root mean squared error
r2 = r2_score(y_test, y_prediction) #r2 score

In [17]:
print(mae, rmse, r2)

0.0038702093093097123 0.011372574594139123 0.955024525277861
