In [43]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [29]:
data = pd.read_csv('bridge_digital_twin_dataset.csv')

In [30]:
data.head()

Unnamed: 0,Timestamp,Strain_microstrain,Deflection_mm,Vibration_ms2,Tilt_deg,Displacement_mm,Crack_Propagation_mm,Corrosion_Level_percent,Cable_Member_Tension_kN,Bearing_Joint_Forces_kN,...,Soil_Saturation_percent,Landslide_Ground_Movement,Simulated_Slope_Displacement_mm,High_Winds_Storms,Simulated_Wind_Load_Pressure_kPa,Abnormal_Traffic_Load_Surges,Simulated_Localized_Stress_Index,Energy_Harvesting_Potential_W,Estimated_Repair_Cost_USD_incremental,Carbon_Footprint_tCO2e_incremental
0,2023-01-01 00:00:00,,12.663104,1.275955,0.58734,22.576518,-8.3e-05,0.00291,404.394906,233.052345,...,9.888758,0.0,0.022592,0.0,0.211946,0.0,0.215354,0.081403,1.343588,0.001348
1,2023-01-01 00:01:00,599.936448,12.235297,0.716857,0.657122,21.202649,9.2e-05,0.004601,367.821051,237.545735,...,9.920331,0.0,0.014647,0.0,0.231188,0.0,0.218747,0.107097,1.397207,0.001299
2,2023-01-01 00:02:00,573.878472,12.447144,0.834753,0.615011,,-0.000377,0.005115,402.393742,239.164832,...,,0.0,0.0,0.0,0.199192,0.0,0.213449,0.141938,1.318876,0.001378
3,2023-01-01 00:03:00,550.457302,12.554883,1.004917,0.617999,21.551945,-0.000646,0.005505,356.024238,237.645586,...,43.526026,0.0,0.0,0.0,0.198359,0.0,0.20819,0.192431,1.351773,0.001379
4,2023-01-01 00:04:00,572.82996,12.598818,1.176253,0.633534,21.711292,0.000247,-0.003415,413.33828,232.310381,...,44.138185,0.0,0.0,0.0,0.18811,0.0,0.26124,0.261609,,0.001267


In [31]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43200 entries, 0 to 43199
Data columns (total 54 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Timestamp                              43200 non-null  object 
 1   Strain_microstrain                     41379 non-null  float64
 2   Deflection_mm                          41467 non-null  float64
 3   Vibration_ms2                          41478 non-null  float64
 4   Tilt_deg                               41447 non-null  float64
 5   Displacement_mm                        41473 non-null  float64
 6   Crack_Propagation_mm                   41494 non-null  float64
 7   Corrosion_Level_percent                41531 non-null  float64
 8   Cable_Member_Tension_kN                41459 non-null  float64
 9   Bearing_Joint_Forces_kN                41515 non-null  float64
 10  Fatigue_Accumulation_au                41496 non-null  float64
 11  Mo

In [32]:
data.isna().sum()

Timestamp                                    0
Strain_microstrain                        1821
Deflection_mm                             1733
Vibration_ms2                             1722
Tilt_deg                                  1753
Displacement_mm                           1727
Crack_Propagation_mm                      1706
Corrosion_Level_percent                   1669
Cable_Member_Tension_kN                   1741
Bearing_Joint_Forces_kN                   1685
Fatigue_Accumulation_au                   1704
Modal_Frequency_Hz                        1719
Temperature_C                             1743
Humidity_percent                          1783
Wind_Speed_ms                             1726
Wind_Direction_deg                        1768
Precipitation_mmh                         1687
Water_Level_m                             1687
Seismic_Activity_ms2                      1693
Solar_Radiation_Wm2                       1728
Air_Quality_Index_AQI                     1739
Soil_Settleme

In [33]:
bridge_data = data[[
    "Strain_microstrain",
    "Vibration_ms2",
    "Temperature_C",
    "Humidity_percent"
]]

bridge_data

Unnamed: 0,Strain_microstrain,Vibration_ms2,Temperature_C,Humidity_percent
0,,1.275955,9.970055,68.515014
1,599.936448,0.716857,10.143691,70.492071
2,573.878472,0.834753,10.119537,70.307717
3,550.457302,1.004917,11.240298,66.929768
4,572.829960,1.176253,9.195454,68.196398
...,...,...,...,...
43195,571.243037,1.014705,10.606595,66.855889
43196,567.188954,0.977208,10.175937,
43197,564.514503,0.985721,10.000999,68.366226
43198,535.732467,1.053336,10.007617,66.964964


In [34]:
bridge_data.shape

(43200, 4)

In [35]:
bridge_data.isnull().sum()

Strain_microstrain    1821
Vibration_ms2         1722
Temperature_C         1743
Humidity_percent      1783
dtype: int64

In [36]:
X = bridge_data.fillna(bridge_data.mean())
Y = data['Structural_Health_Index_SHI']
y = Y.fillna(Y.mean())


In [37]:
pipeline = Pipeline([("scaler",StandardScaler()),("model",RandomForestRegressor(n_estimators=200, max_depth=17, min_samples_leaf=3, random_state=42))])

In [38]:
x_train, x_test, y_train, y_test = train_test_split(X,y,random_state=42,test_size=0.2)

For training model

In [39]:
pipeline.fit(x_train, y_train)

In [40]:
y_prediction = pipeline.predict(x_test)

In [41]:
mae = mean_absolute_error(y_test, y_prediction) #mean absolute error
rmse = np.sqrt(mean_squared_error(y_test, y_prediction)) #root mean squared error
r2 = r2_score(y_test, y_prediction) #r2 score

In [42]:
print(mae, rmse, r2)

0.006877107894533844 0.01380043168911023 0.9337717146527251


In [44]:
joblib.dump(pipeline, 'bridge_model.pkl')

['bridge_model.pkl']