In [2]:
# generate_dataset.py
import pandas as pd
import random
from datetime import datetime, timedelta

# Constants
start_date = datetime.strptime('2024-04-01', '%Y-%m-%d')
vegetables = ['Tomato', 'Brinjal', 'Onion', 'Carrot', 'Beans',
              'Cabbage', 'Cauliflower', 'Potato', 'Chilli',
              'Pumpkin', 'Radish', 'Spinach', 'Coriander']
seasons = {
    'Summer': [(4, 1), (6, 30)],
    'Monsoon': [(7, 1), (9, 30)],
    'Winter': [(10, 1), (12, 31)],
    'Spring': [(1, 1), (3, 31)]
}
festival_dates = ['2024-04-14', '2024-08-15', '2024-10-02', '2024-11-12', '2025-01-14']
strike_dates = ['2024-05-05', '2024-09-10', '2024-12-20']
price_baselines = {
    'Tomato': 20, 'Brinjal': 25, 'Onion': 30, 'Carrot': 35, 'Beans': 28,
    'Cabbage': 18, 'Cauliflower': 26, 'Potato': 22, 'Chilli': 40,
    'Pumpkin': 16, 'Radish': 19, 'Spinach': 15, 'Coriander': 12
}

# Generate data
data = []
for i in range(365):
    current_date = start_date + timedelta(days=i)
    date_str = current_date.strftime('%Y-%m-%d')
    month, day = current_date.month, current_date.day
    season = next(s for s, ((sm, sd), (em, ed)) in seasons.items()
                  if (sm < month or (sm == month and sd <= day)) and
                     (em > month or (em == month and ed >= day)))
    day_of_week = current_date.strftime('%A')

    for veg in vegetables:
        temp = random.uniform(25, 38) if season == 'Summer' else random.uniform(20, 30)
        rainfall = random.uniform(0, 40) if season == 'Monsoon' else random.uniform(0, 5)
        humidity = random.uniform(50, 90)
        festival = 'Yes' if date_str in festival_dates else 'No'
        strike = 'Yes' if date_str in strike_dates else 'No'
        supply = round(random.uniform(5, 20), 2)
        demand = round(random.uniform(40, 100), 2)

        price = price_baselines[veg] + (20 - supply) * 0.7 + demand * 0.2
        if festival == 'Yes':
            price += 5
        if strike == 'Yes':
            price += 3
        if rainfall > 30:
            supply *= 0.7
            price += 2

        data.append([
            date_str, veg, round(price, 2), round(temp, 2), round(rainfall, 2),
            round(humidity, 2), festival, strike, supply, demand, season, day_of_week
        ])

# Save to CSV
columns = ['date', 'vegetable', 'price_per_kg', 'temperature_c', 'rainfall_mm', 'humidity_percent',
           'festival', 'transport_strike', 'supply_quantity_ton', 'demand_index', 'season', 'day_of_week']
df = pd.DataFrame(data, columns=columns)
df.to_csv('data.csv', index=False)

print("✅ Dataset saved as tamil_nadu_panruti_vegetable_prices.csv")


✅ Dataset saved as tamil_nadu_panruti_vegetable_prices.csv
