In [37]:
import os
os.chdir(r"C:\Users\hp\Documents\ML_Projects\Delhi_Infrastructure_Risk_AI")



In [38]:
import geopandas as gpd
import pandas as pd
import numpy as np


In [59]:
roads = gpd.read_file("gis/processed/delhi_ncr_roads_flyovers.geojson")
roads.head()
print(roads.shape)


(0, 1)


In [60]:

print("roads shape:", roads.shape)
roads.head()

roads shape: (0, 1)


Unnamed: 0,geometry


In [40]:
roads = roads.dropna(subset=["geometry"])
roads = roads.reset_index(drop=True)


In [41]:
roads["asset_id"] = roads.index


In [42]:
roads.columns


Index(['geometry', 'asset_id'], dtype='object')

In [45]:
roads = roads.to_crs(epsg=32643)  # UTM zone for Delhi (meters)

roads["length_m"] = roads.geometry.length
roads["length_km"] = roads["length_m"] / 1000


In [47]:
np.random.seed(42)

roads["is_flyover"] = np.random.choice(
    [0, 1],
    size=len(roads),
    p=[0.85, 0.15]  # ~15% flyovers (realistic urban ratio)
)


In [48]:
CURRENT_YEAR = 2026

roads["construction_year"] = np.where(
    roads["is_flyover"] == 1,
    np.random.randint(1998, 2016, size=len(roads)),
    np.random.randint(1985, 2022, size=len(roads))
)

roads["age_years"] = CURRENT_YEAR - roads["construction_year"]
roads["SAI"] = roads["age_years"] / roads["age_years"].max()



In [49]:
roads["TSI"] = roads["length_km"] / roads["length_km"].max()


In [50]:
roads["last_maintenance_year"] = np.random.randint(2012, 2024, size=len(roads))

roads["MNS"] = (
    CURRENT_YEAR - roads["last_maintenance_year"]
) / (CURRENT_YEAR - roads["last_maintenance_year"]).max()


In [51]:
roads["complaints"] = np.random.poisson(lam=3, size=len(roads))
roads["CDI"] = roads["complaints"] / roads["complaints"].max()


In [56]:
roads["FEI"] = (
    roads["length_km"] *
    np.random.uniform(0.6, 1.4, size=len(roads))
)

roads["FEI"] = roads["FEI"] / roads["FEI"].max()


In [57]:
features = roads[
    [
        "asset_id",
        "is_flyover",
        "SAI",
        "TSI",
        "FEI",
        "MNS",
        "CDI"
    ]
].copy()

features.describe()


Unnamed: 0,asset_id,is_flyover,SAI,TSI,FEI,MNS,CDI
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,,,,,,,
std,,,,,,,
min,,,,,,,
25%,,,,,,,
50%,,,,,,,
75%,,,,,,,
max,,,,,,,


In [58]:
os.makedirs("data/processed", exist_ok=True)

features.to_csv(
    "data/processed/engineered_features.csv",
    index=False
)


In [61]:
import pandas as pd
import numpy as np
import os

np.random.seed(42)

N_ASSETS = 5000   # realistic city-scale

df = pd.DataFrame({
    "asset_id": range(N_ASSETS),
    
    # Flyover flag (~15%)
    "is_flyover": np.random.choice([0, 1], size=N_ASSETS, p=[0.85, 0.15]),
    
    # Structural Age Index
    "SAI": np.random.beta(2, 5, size=N_ASSETS),
    
    # Traffic Stress Index
    "TSI": np.random.beta(2, 3, size=N_ASSETS),
    
    # Flood Exposure Index
    "FEI": np.random.beta(3, 3, size=N_ASSETS),
    
    # Maintenance Neglect Score
    "MNS": np.random.beta(2, 4, size=N_ASSETS),
    
    # Citizen Distress Index
    "CDI": np.random.beta(1.5, 5, size=N_ASSETS)
})

df.describe()


Unnamed: 0,asset_id,is_flyover,SAI,TSI,FEI,MNS,CDI
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,2499.5,0.146,0.284383,0.398912,0.501878,0.331966,0.228951
std,1443.520003,0.353142,0.156957,0.198738,0.189519,0.179146,0.15221
min,0.0,0.0,0.003584,0.001095,0.03564,0.003366,0.000728
25%,1249.75,0.0,0.162888,0.242274,0.3608,0.192579,0.108439
50%,2499.5,0.0,0.264593,0.388066,0.503,0.311809,0.2002
75%,3749.25,0.0,0.385802,0.544758,0.642007,0.452294,0.327633
max,4999.0,1.0,0.833373,0.959279,0.972012,0.896641,0.788733


In [62]:
os.makedirs("data/processed", exist_ok=True)

df.to_csv(
    "data/processed/engineered_features.csv",
    index=False
)

df.shape


(5000, 7)