In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import random


# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)


# Create a fake building dataset for Chandigarh
# Each building has an ID, lat, lon, type, and block_size factor (affecting order volume)
buildings_data = [
   {"building_id": 1, "latitude": 30.7333, "longitude": 76.7794, "type": "residential", "block_size": 1.0},
   {"building_id": 2, "latitude": 30.7350, "longitude": 76.7800, "type": "residential", "block_size": 1.2},
   {"building_id": 3, "latitude": 30.7400, "longitude": 76.7850, "type": "commercial",  "block_size": 1.5},
   {"building_id": 4, "latitude": 30.7500, "longitude": 76.8000, "type": "office",      "block_size": 2.0},
   {"building_id": 5, "latitude": 30.7100, "longitude": 76.7600, "type": "residential", "block_size": 0.8}
]


buildings_df = pd.DataFrame(buildings_data)


orders = []
order_id = 1

In [2]:
# Define a base order factor per building type
order_factor = {"residential": 50, "commercial": 100, "office": 150}


# For each building, generate a number of orders based on its type and block_size
for _, building in buildings_df.iterrows():
   # Use Poisson distribution to simulate the number of orders for this building
   lam = order_factor[building["type"]] * building["block_size"]
   num_orders = np.random.poisson(lam=lam)
  
   for _ in range(num_orders):
       # Generate a random order time within a day (e.g., 2023-01-01)
       start_time = datetime(2023, 1, 1)
       random_seconds = random.randint(0, 86400 - 1)
       order_time = start_time + timedelta(seconds=random_seconds)
      
       # Simulate an order amount between ₹50 and ₹500
       order_amount = round(random.uniform(50, 500), 2)
      
       orders.append({
           "order_id": order_id,
           "building_id": building["building_id"],
           "latitude": building["latitude"],
           "longitude": building["longitude"],
           "order_time": order_time,
           "order_amount": order_amount,
           "building_type": building["type"]
       })
       order_id += 1


# Create a DataFrame for the orders and display a preview
orders_df = pd.DataFrame(orders)
print(orders_df.head())


# Optionally, save the generated orders to a CSV file
orders_df.to_csv("fake_orders_by_building_chandigarh.csv", index=False)

   order_id  building_id  latitude  longitude          order_time  \
0         1            1   30.7333    76.7794 2023-01-01 23:16:50   
1         2            1   30.7333    76.7794 2023-01-01 10:00:48   
2         3            1   30.7333    76.7794 2023-01-01 05:04:49   
3         4            1   30.7333    76.7794 2023-01-01 19:51:22   
4         5            1   30.7333    76.7794 2023-01-01 15:21:42   

   order_amount building_type  
0        100.10   residential  
1        160.20   residential  
2        381.41   residential  
3         89.12   residential  
4         64.30   residential  


    id         type  people                                           geometry
0  NaN  residential     500  POLYGON ((76.74023 30.65874, 76.73988 30.65878...
1  2.0  residential     800  POLYGON ((76.73956 30.66019, 76.73961 30.66127...
2  3.0  residential     400  POLYGON ((76.74207 30.66088, 76.74189 30.66056...
3  4.0  residential     450  POLYGON ((76.74486 30.65965, 76.74513 30.65971...
4  5.0  residential     500  POLYGON ((76.74451 30.65935, 76.74472 30.65943...
Index(['id', 'type', 'people', 'geometry'], dtype='object')



  gdf["centroid"] = gdf["geometry"].centroid


In [10]:
import numpy as np
import pandas as pd
import geopandas as gpd
from datetime import datetime, timedelta
import random

# Set random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Load the building shapefile
shp_file = "qgisshp1/data.shp"  # Update with actual path
gdf = gpd.read_file(shp_file)

# Ensure 'id' is properly formatted
gdf["id"] = gdf["id"].fillna(-1).astype(int)

# Reproject to a projected CRS (UTM Zone 43N for Chandigarh) for accurate centroids
gdf = gdf.to_crs(epsg=32643)

# Compute centroids in projected CRS
gdf["centroid"] = gdf["geometry"].centroid

# Convert back to geographic CRS (WGS 84) for latitude/longitude
gdf = gdf.to_crs(epsg=4326)
gdf["latitude"] = gdf["centroid"].y
gdf["longitude"] = gdf["centroid"].x

# Drop the centroid geometry column to keep it clean
gdf = gdf.drop(columns=["centroid"])

# Define base order factors for each building type
order_factor = {"residential": 50, "commercial": 100, "office": 150}

# Generate order data
orders = []
order_id = 1

for _, building in gdf.iterrows():
    building_type = building["type"]
    num_people = building["people"]
    
    # Set order volume proportional to the number of people
    lam = order_factor.get(building_type, 50) * (num_people / 500)  # Normalize by 500 people
    num_orders = np.random.poisson(lam=lam)
    
    for _ in range(num_orders):
        # Generate a random order time within a day (2023-01-01)
        start_time = datetime(2023, 1, 1)
        random_seconds = random.randint(0, 86400 - 1)
        order_time = start_time + timedelta(seconds=random_seconds)

        # Generate a random order amount between ₹50 and ₹500
        order_amount = round(random.uniform(50, 500), 2)

        orders.append({
            "order_id": order_id,
            "building_id": building["id"],
            "latitude": building["latitude"],
            "longitude": building["longitude"],
            "order_time": order_time,
            "order_amount": order_amount,
            "building_type": building_type
        })
        order_id += 1

# Convert orders to DataFrame
orders_df = pd.DataFrame(orders)

# Save to CSV
orders_df.to_csv("generated_orders_chandigarh.csv", index=False)

# Display preview
print(orders_df.head())


   order_id  building_id      latitude      longitude          order_time  \
0         1           -1  3.393137e+06  666704.617019 2023-01-01 23:16:50   
1         2           -1  3.393137e+06  666704.617019 2023-01-01 10:00:48   
2         3           -1  3.393137e+06  666704.617019 2023-01-01 05:04:49   
3         4           -1  3.393137e+06  666704.617019 2023-01-01 19:51:22   
4         5           -1  3.393137e+06  666704.617019 2023-01-01 15:21:42   

   order_amount building_type  
0        100.10   residential  
1        160.20   residential  
2        381.41   residential  
3         89.12   residential  
4         64.30   residential  


In [11]:
# Next Line of Action could varying the numbers of Orders in respect to the building type and time
# Creatin better shp files
# Looking for more papers to guidce us in sampling data