In [None]:
import pandas as pd
import random
import numpy as np
from datetime import datetime, timedelta

# Load base files
waste_segregation_data = pd.read_csv("data/Pune SolidWasteSegregation_0.csv")
waste_collection_data = pd.read_csv("data/Pune SolidWasteCollectionEquipment_0.csv")

# Parameters for dummy data generation
num_days = 30  # Number of days to simulate

# Generate random dates
start_date = datetime(2024, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(num_days)]

# Function to simulate waste generation based on various factors
def simulate_waste(weight_mean, weight_std):
    return max(0, round(random.gauss(weight_mean, weight_std), 2))  # Ensure non-negative values

# Generate dummy waste prediction data based on the base file
waste_prediction_data = []
for date in dates:
    for _, row in waste_segregation_data.iterrows():
        ward_name = row["Ward name"]
        num_households = row["Number of HH"]
        for household_id in range(1, int(num_households * 0.01) + 1):  # Simulating 1% of households
            weight_mean = row["Waste quantity (Tonnes Per Day)"] * 1000 / num_households  # Convert tonnes to kg per household
            weight_std = weight_mean * 0.2  # Assume 20% variability
            waste_weight = simulate_waste(weight_mean, weight_std)
            waste_prediction_data.append({
                "date": date,
                "ward_name": ward_name,
                "household_id": f"{ward_name}-{household_id}",
                "waste_weight_kg": waste_weight
            })

# Convert to DataFrame
waste_prediction_df = pd.DataFrame(waste_prediction_data)   

# Generate dummy distance matrix for route optimization based on the base file
households = [f"{row['Ward name']}-{i}" for _, row in waste_segregation_data.iterrows() for i in range(1, int(row["Number of HH"] * 0.01) + 1)]
distance_matrix = pd.DataFrame(
    np.random.randint(1, 10, size=(len(households), len(households))),
    index=households,
    columns=households
)

# Save the generated data to CSV files
waste_prediction_df.to_csv("dataset/dummy_waste_prediction.csv", index=False)
distance_matrix.to_csv("dataset/dummy_distance_matrix.csv")

print("Dummy data generation based on base files completed.")


In [7]:
waste_prediction_df.to_csv("dataset/dummy_waste_prediction.csv", index=False)
distance_matrix.to_csv("dataset/dummy_distance_matrix.csv")

KeyboardInterrupt: 

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import networkx as nx
import itertools

# Load generated data
waste_data = pd.read_csv("dataset/dummy_waste_prediction.csv")
distance_matrix = pd.read_csv("dataset/dummy_distance_matrix.csv", index_col=0)

# Preprocess data for ML model
waste_data["date"] = pd.to_datetime(waste_data["date"])
waste_data["day_of_week"] = waste_data["date"].dt.dayofweek  # Add day of the week feature

# Features and target variable
X = waste_data[["day_of_week", "ward_name"]]
y = waste_data["waste_weight_kg"]

# One-hot encode categorical feature (ward_name)
X = pd.get_dummies(X, columns=["ward_name"], drop_first=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the ML model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
predictions = model.predict(X_test)
rmse = mean_squared_error(y_test, predictions, squared=False)
print(f"RMSE for waste prediction model: {rmse:.2f} kg")

RMSE for waste prediction model: 0.09 kg




In [14]:
def optimize_routes(distance_matrix, waste_data, truck_capacity):
    households = waste_data["household_id"].tolist()
    distances = distance_matrix.loc[households, households].to_numpy()
    
    G = nx.Graph()
    for i, j in itertools.combinations(range(len(households)), 2):
        G.add_edge(households[i], households[j], weight=distances[i, j])

    # TSP solver with capacity constraint
    selected_routes = []
    remaining_capacity = truck_capacity
    current_route = []
    current_weight = 0

    for household, weight in zip(waste_data["household_id"], waste_data["waste_weight_kg"]):
        if current_weight + weight <= remaining_capacity:
            current_route.append(household)
            current_weight += weight
        else:
            selected_routes.append(current_route)
            current_route = [household]
            current_weight = weight
            remaining_capacity = truck_capacity

    if current_route:
        selected_routes.append(current_route)

    # Generate optimized routes based on distances
    optimized_routes = []
    for route in selected_routes:
        route_distances = distance_matrix.loc[route, route].to_numpy()
        route_G = nx.Graph()
        for i, j in itertools.combinations(range(len(route)), 2):
            route_G.add_edge(route[i], route[j], weight=route_distances[i, j])
        tsp_path = nx.approximation.greedy_tsp(route_G, cycle=True)
        optimized_routes.append(tsp_path)

    return optimized_routes

# Predict waste for a specific day and ward
future_data = waste_data[waste_data["date"] == waste_data["date"].max()]
predicted_waste = model.predict(X)
future_data["predicted_waste"] = predicted_waste

# Optimize routes
truck_capacity = 1000  # kg
routes = optimize_routes(distance_matrix, future_data, truck_capacity)

print("Optimized routes:")
for i, route in enumerate(routes):
    print(f"Route {i + 1}: {route}")


ValueError: Length of values (1287600) does not match length of index (42920)