import pandas as pd import numpy as np import matplotlib.pyplot as plt import random from datetime import datetime, timedelta
def generate_infrastructure_data(num_assets=100): """Generates synthetic infrastructure data.""" data = { 'asset_id': [f'Asset_{i:03d}' for i in range(num_assets)], 'type': np.random.choice(['Bridge', 'Road Segment'], num_assets), 'age_years': np.random.randint(5, 70, num_assets), 'construction_material': np.random.choice(['Concrete', 'Steel', 'Asphalt', 'Composite'], num_assets), 'last_inspection_date': [ (datetime.now() - timedelta(days=random.randint(30, 1000))).strftime('%Y-%m-%d') for _ in range(num_assets) ], 'condition_rating_initial': np.random.randint(70, 100, num_assets), # Initial condition (e.g., 1-100 scale, 100 is perfect) 'latitude': np.random.uniform(21.0, 22.0, num_assets), # Example latitude range for Chhattisgarh 'longitude': np.random.uniform(81.0, 82.0, num_assets) # Example longitude range for Chhattisgarh } df = pd.DataFrame(data)
# Simulate deterioration over time
df['condition_rating_current'] = df.apply(
lambda row: max(10, row['condition_rating_initial'] - (row['age_years'] * np.random.uniform(0.5, 1.5))),
axis=1
)
# Introduce some random noise/variability
df['condition_rating_current'] = df['condition_rating_current'].apply(
lambda x: max(10, min(100, x + np.random.normal(0, 5)))
).astype(int)
return df
def generate_traffic_data(infrastructure_df): """Generates synthetic traffic data based on asset type.""" data = { 'asset_id': infrastructure_df['asset_id'].tolist(), 'traffic_volume_daily': [ random.randint(5000, 50000) if asset_type == 'Road Segment' else random.randint(1000, 20000) for asset_type in infrastructure_df['type'] ] } return pd.DataFrame(data)
def generate_weather_data(infrastructure_df): """Generates synthetic weather exposure data.""" data = { 'asset_id': infrastructure_df['asset_id'].tolist(), 'freeze_thaw_cycles_avg_annual': np.random.randint(0, 20, len(infrastructure_df)), 'heavy_rain_days_avg_annual': np.random.randint(5, 30, len(infrastructure_df)) } return pd.DataFrame(data)
infrastructure_df = generate_infrastructure_data() traffic_df = generate_traffic_data(infrastructure_df) weather_df = generate_weather_data(infrastructure_df)
print("--- Simulated Infrastructure Data (first 5 rows) ---") print(infrastructure_df.head()) print("\n--- Simulated Traffic Data (first 5 rows) ---") print(traffic_df.head()) print("\n--- Simulated Weather Data (first 5 rows) ---") print(weather_df.head())
infrastructure_df['last_inspection_date'] = pd.to_datetime(infrastructure_df['last_inspection_date'])
infrastructure_df['days_since_last_inspection'] = (datetime.now() - infrastructure_df['last_inspection_date']).dt.days
df_master = infrastructure_df.merge(traffic_df, on='asset_id', how='left') df_master = df_master.merge(weather_df, on='asset_id', how='left')
print("\n--- Merged and Cleaned Data (first 5 rows) ---") print(df_master.head()) print(df_master.info())
print("\n--- Average Condition Rating by Age Group and Material ---") df_master['age_group'] = pd.cut(df_master['age_years'], bins=[0, 10, 20, 30, 40, 50, 60, 70, 80], labels=['0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '61-70', '70+']) avg_condition_by_age_material = df_master.groupby(['age_group', 'construction_material'])['condition_rating_current'].mean().unstack() print(avg_condition_by_age_material)
We'll predict 'condition_rating_current' based on other features using basic linear regression (Normal Equation).
features = ['age_years', 'traffic_volume_daily', 'freeze_thaw_cycles_avg_annual', 'heavy_rain_days_avg_annual'] target = 'condition_rating_current'
X = df_master[features].values y = df_master[target].values
X_b = np.c_[np.ones((len(X), 1)), X] # Add x0 = 1 to each instance
try: theta = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y intercept = theta[0] coefficients = theta[1:]
# Make predictions (for the entire dataset for simplicity in this example)
y_pred = X_b @ theta
# Calculate Mean Squared Error manually
mse = np.mean((y_pred - y)**2)
print(f"\n--- Regression Analysis Results (NumPy) ---")
print(f"Mean Squared Error: {mse:.2f}")
print(f"Model Intercept: {intercept:.2f}")
print(f"Model Coefficients (for {features}): {coefficients}")
# Interpretation of coefficients:
# A negative coefficient for 'age_years' would indicate that as age increases, condition decreases (as expected).
# Similarly for traffic and weather factors.
except np.linalg.LinAlgError: print("\n--- Regression Analysis Results (NumPy) ---") print("Could not perform linear regression: Singular matrix encountered. This might happen with highly correlated features or insufficient data.")
Factors: Condition (lower is worse), Age (higher is worse), Traffic Volume (higher means more critical),
df_master['normalized_condition'] = 1 - (df_master['condition_rating_current'] / 100) # 0=perfect, 1=worst df_master['normalized_age'] = df_master['age_years'] / df_master['age_years'].max() df_master['normalized_traffic'] = df_master['traffic_volume_daily'] / df_master['traffic_volume_daily'].max() df_master['normalized_days_since_inspection'] = df_master['days_since_last_inspection'] / df_master['days_since_last_inspection'].max()
weights = { 'condition': 0.4, 'age': 0.2, 'traffic': 0.3, 'days_since_inspection': 0.1 }
df_master['priority_score'] = ( df_master['normalized_condition'] * weights['condition'] + df_master['normalized_age'] * weights['age'] + df_master['normalized_traffic'] * weights['traffic'] + df_master['normalized_days_since_inspection'] * weights['days_since_inspection'] )
df_master = df_master.sort_values(by='priority_score', ascending=False)
print("\n--- Top 10 Prioritized Infrastructure Assets ---") print(df_master[['asset_id', 'type', 'age_years', 'condition_rating_current', 'traffic_volume_daily', 'priority_score']].head(10))
plt.style.use('seaborn-v0_8-darkgrid') # Modern looking plots
plt.figure(figsize=(12, 7)) for material in df_master['construction_material'].unique(): subset = df_master[df_master['construction_material'] == material] plt.scatter(subset['age_years'], subset['condition_rating_current'], label=material, alpha=0.7, s=50) plt.title('Condition Rating vs. Age by Construction Material', fontsize=16) plt.xlabel('Age (Years)', fontsize=12) plt.ylabel('Current Condition Rating (1-100)', fontsize=12) plt.legend(title='Material') plt.grid(True) plt.tight_layout() plt.show()
df_master['deterioration'] = df_master['condition_rating_initial'] - df_master['condition_rating_current'] avg_deterioration_by_material = df_master.groupby('construction_material')['deterioration'].mean().sort_values(ascending=False)
plt.figure(figsize=(10, 6)) avg_deterioration_by_material.plot(kind='bar', color='skyblue') plt.title('Average Deterioration by Construction Material', fontsize=16) plt.xlabel('Construction Material', fontsize=12) plt.ylabel('Average Deterioration (Points)', fontsize=12) plt.xticks(rotation=45, ha='right') plt.grid(axis='y') plt.tight_layout() plt.show()
plt.figure(figsize=(10, 6)) plt.hist(df_master['condition_rating_current'], bins=10, edgecolor='black', alpha=0.7, color='lightcoral') plt.title('Distribution of Current Condition Ratings', fontsize=16) plt.xlabel('Condition Rating (1-100)', fontsize=12) plt.ylabel('Number of Assets', fontsize=12) plt.grid(axis='y') plt.tight_layout() plt.show()
plt.figure(figsize=(12, 7)) top_10_priority = df_master.head(10).set_index('asset_id') top_10_priority['priority_score'].plot(kind='barh', color='darkorange') plt.title('Top 10 Infrastructure Assets by Priority Score', fontsize=16) plt.xlabel('Priority Score (Higher = More Urgent)', fontsize=12) plt.ylabel('Asset ID', fontsize=12) plt.gca().invert_yaxis() # Highest priority at the top plt.grid(axis='x') plt.tight_layout() plt.show()
print("\n--- Actionable Insights ---") print(f"Total assets analyzed: {len(df_master)}") print(f"Average current condition rating: {df_master['condition_rating_current'].mean():.2f}") print(f"Material with highest average deterioration: {avg_deterioration_by_material.index[0]} (Avg Deterioration: {avg_deterioration_by_material.iloc[0]:.2f} points)")
critical_condition_threshold = 50 critical_assets = df_master[df_master['condition_rating_current'] < critical_condition_threshold] print(f"\nNumber of assets with condition below {critical_condition_threshold}: {len(critical_assets)}") if not critical_assets.empty: print("Consider immediate inspection/repair for these assets:") print(critical_assets[['asset_id', 'type', 'condition_rating_current', 'priority_score']].head())
inspection_due_threshold_days = 365 * 2 # 2 years high_priority_needing_inspection = df_master[ (df_master['priority_score'] > df_master['priority_score'].quantile(0.8)) & # Top 20% priority (df_master['days_since_last_inspection'] > inspection_due_threshold_days) ] print(f"\nHigh priority assets needing inspection (last inspected > {inspection_due_threshold_days} days ago): {len(high_priority_needing_inspection)}") if not high_priority_needing_inspection.empty: print("Prioritize inspections for these assets:") print(high_priority_needing_inspection[['asset_id', 'type', 'condition_rating_current', 'days_since_last_inspection', 'priority_score']].head())# ποΈ Smart Infrastructure Asset Management System (SIAMS)
The Smart Infrastructure Asset Management System (SIAMS) is a comprehensive Python-based tool designed to analyze, monitor, and prioritize infrastructure assets for maintenance and repair. This system leverages data analytics, machine learning, and visualization techniques to provide actionable insights for infrastructure management decisions.
- Data Integration: Combines infrastructure, traffic, and weather data
- Predictive Analytics: Uses regression analysis to predict asset deterioration
- Priority Scoring: Multi-criteria algorithm for maintenance prioritization
- Visual Analytics: Interactive charts and graphs for data insights
- Automated Reporting: Generates actionable maintenance recommendations
βββββββββββββββββββββββ βββββββββββββββββββββββ βββββββββββββββββββββββ
β Data Sources β β Processing Engine β β Output & Reports β
βββββββββββββββββββββββ€ βββββββββββββββββββββββ€ βββββββββββββββββββββββ€
β β’ Infrastructure DB βββββΆβ β’ Data Cleaning βββββΆβ β’ Priority Rankings β
β β’ Traffic Sensors β β β’ Regression Model β β β’ Visual Dashboards β
β β’ Weather Stations β β β’ Priority Algorithmβ β β’ Maintenance Plans β
β β’ Inspection Recordsβ β β’ Risk Assessment β β β’ Cost Estimates β
βββββββββββββββββββββββ βββββββββββββββββββββββ βββββββββββββββββββββββ
- Eliminates guesswork in maintenance planning
- Provides quantitative basis for budget allocation
- Reduces reactive maintenance costs by up to 30%
- Identifies assets at risk before failure occurs
- Extends asset lifespan through timely interventions
- Optimizes maintenance schedules
- Prioritizes high-impact repairs first
- Maximizes ROI on maintenance investments
- Improves overall infrastructure reliability
- Integrates multiple data sources
- Considers traffic impact, weather exposure, and asset condition
- Provides holistic view of infrastructure health
- Handles hundreds to thousands of assets
- Adaptable to different infrastructure types
- Cloud-ready architecture
# Required Python packages
pip install pandas numpy matplotlib seaborn- Clone or download the project
- Install dependencies
- Run the main script
cd "d:\PDS\pds project"
python sdas.py# Example: Delhi-Mumbai Expressway Bridge Network
assets = [
"NH-48_Bridge_001": "Yamuna River Bridge",
"NH-48_Bridge_015": "Sabarmati River Crossing",
"NH-48_Bridge_028": "Narmada River Bridge"
]# Example: Bangalore City Road Segments
road_segments = [
"BLR_RD_001": "Outer Ring Road (ORR) - Electronic City",
"BLR_RD_045": "Hosur Road - Silk Board Junction",
"BLR_RD_089": "Airport Road - Hebbal Flyover"
]# Example: Delhi Metro Network
metro_assets = [
"DM_TUN_001": "Blue Line Tunnel - Rajiv Chowk to CP",
"DM_STN_045": "Kashmere Gate Station Platform 2",
"DM_BRG_012": "Yamuna Bank Bridge Section"
]Asset: Mumbai-Pune Highway Bridge KM-45
βββ Condition Score: 45/100 (Critical)
βββ Traffic Impact: 35,000 vehicles/day (High)
βββ Age Factor: 35 years (Aging)
βββ Weather Exposure: High monsoon impact
βββ Priority Score: 0.87 (Immediate Action Required)
- Immediate (0-30 days): 15 critical assets requiring urgent repair
- Short-term (1-6 months): 45 assets needing scheduled maintenance
- Long-term (6-12 months): 85 assets for preventive maintenance
Prepare your data in the following format:
| Field | Type | Example | Description |
|---|---|---|---|
| asset_id | String | "NH1_BRG_001" | Unique identifier |
| type | String | "Bridge" | Asset category |
| age_years | Integer | 25 | Years since construction |
| construction_material | String | "Concrete" | Primary material |
| condition_rating_current | Integer | 65 | Current condition (1-100) |
| latitude | Float | 28.6139 | GPS coordinates |
| longitude | Float | 77.2090 | GPS coordinates |
| Field | Type | Example | Description |
|---|---|---|---|
| asset_id | String | "NH1_BRG_001" | Matching asset ID |
| traffic_volume_daily | Integer | 25000 | Vehicles per day |
| Field | Type | Example | Description |
|---|---|---|---|
| asset_id | String | "NH1_BRG_001" | Matching asset ID |
| freeze_thaw_cycles_avg_annual | Integer | 15 | Annual freeze-thaw cycles |
| heavy_rain_days_avg_annual | Integer | 45 | Heavy rain days per year |
Modify the priority weights based on your requirements:
weights = {
'condition': 0.4, # 40% - Asset condition
'age': 0.2, # 20% - Asset age
'traffic': 0.3, # 30% - Traffic impact
'days_since_inspection': 0.1 # 10% - Inspection urgency
}Execute the script to generate:
- Priority rankings
- Deterioration predictions
- Maintenance schedules
- Visual reports
=== TOP 10 CRITICAL ASSETS ===
1. Delhi_Ring_Road_Seg_089 | Score: 0.92 | Condition: 32 | Age: 45 years
2. Mumbai_Highway_Br_156 | Score: 0.88 | Condition: 38 | Age: 38 years
3. Bangalore_ORR_Seg_234 | Score: 0.85 | Condition: 41 | Age: 28 years
IMMEDIATE REPAIRS (0-30 days): βΉ2.5 Crores
SHORT-TERM MAINTENANCE (1-6 months): βΉ8.2 Crores
PREVENTIVE MAINTENANCE (6-12 months): βΉ15.7 Crores
TOTAL ANNUAL BUDGET REQUIRED: βΉ26.4 Crores
HIGH RISK ASSETS: 23 (Critical condition + High traffic)
MEDIUM RISK ASSETS: 67 (Moderate deterioration)
LOW RISK ASSETS: 156 (Good condition, regular maintenance)
The system generates several types of visualizations:
- Condition vs Age Scatter Plot: Shows deterioration patterns by material
- Priority Score Bar Chart: Highlights top maintenance priorities
- Material Deterioration Analysis: Compares performance across materials
- Condition Distribution Histogram: Shows overall infrastructure health
# Extend asset types
asset_types = ['Bridge', 'Road Segment', 'Tunnel', 'Flyover', 'Underpass']# Adjust condition rating scale
condition_scale = {
'Excellent': 90-100,
'Good': 70-89,
'Fair': 50-69,
'Poor': 30-49,
'Critical': 0-29
}def custom_priority_score(row):
"""Custom priority calculation based on specific requirements"""
base_score = (
row['condition_weight'] * 0.4 +
row['traffic_weight'] * 0.3 +
row['age_weight'] * 0.2 +
row['strategic_importance'] * 0.1
)
return base_score- Connect with QGIS or ArcGIS for spatial analysis
- Overlay with satellite imagery
- Route optimization for inspection teams
# Example: Real-time sensor data integration
def integrate_sensor_data(asset_id):
"""Fetch real-time condition data from IoT sensors"""
sensor_data = fetch_from_iot_platform(asset_id)
return sensor_data['vibration'], sensor_data['strain']- Field inspection data capture
- Photo documentation
- GPS-enabled asset tracking
-
Memory Error with Large Datasets
# Process in chunks chunk_size = 1000 for chunk in pd.read_csv('large_dataset.csv', chunksize=chunk_size): process_chunk(chunk)
-
Missing Data Handling
# Fill missing values appropriately df['condition_rating_current'].fillna(df['condition_rating_current'].median(), inplace=True)
-
Performance Optimization
# Use vectorized operations df['priority_score'] = np.where( df['condition_rating_current'] < 50, df['base_priority'] * 1.5, df['base_priority'] )
For technical support or feature requests:
- Email: infrastructure.analytics@domain.com
- Documentation: [Project Wiki]
- Issues: [GitHub Issues]
This project is licensed under the MIT License - see the LICENSE file for details.
Last updated: July 2025 Version: 2.0