In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Fix random seed
random.seed(42)
np.random.seed(42)

# Date range for 2024
start_date = datetime.strptime("2024-01-01", "%Y-%m-%d")
end_date = datetime.strptime("2024-12-31", "%Y-%m-%d")
date_range = pd.date_range(start_date, end_date)

# Festival days for 2024 (including Onam)
festival_days = [
    "2024-01-15",  # Pongal
    "2024-03-25",  # Holi
    "2024-04-10",  # Eid al-Fitr
    "2024-06-17",  # Eid al-Adha
    "2024-08-19",  # Raksha Bandhan
    "2024-08-26",  # Janmashtami
    "2024-08-28",  # Onam
    "2024-09-07",  # Ganesh Chaturthi
    "2024-10-03",  # Navratri Start
    "2024-11-01",  # Diwali
    "2024-12-25"   # Christmas
]

# Mobile models
mobiles = [
    ("Samsung", "Galaxy M14", "64GB"),
    ("Samsung", "Galaxy A15", "128GB"),
    ("Redmi", "Note 12", "128GB"),
    ("Redmi", "12C", "64GB"),
    ("Realme", "Narzo 60", "128GB"),
    ("Realme", "C55", "64GB"),
    ("iQOO", "Z7 5G", "128GB"),
    ("iQOO", "Neo 7 Pro", "256GB"),
    ("OnePlus", "Nord CE 3 Lite", "128GB"),
    ("OnePlus", "Nord 3 5G", "256GB"),
    ("Motorola", "G73 5G", "128GB"),
    ("Motorola", "Edge 40 Neo", "128GB"),
    ("Vivo", "T2 5G", "128GB"),
    ("Vivo", "Y27", "64GB"),
    ("Oppo", "A78", "128GB"),
    ("Oppo", "Reno8 T", "128GB"),
    ("Poco", "X5 Pro", "256GB"),
    ("Poco", "M6 Pro", "128GB"),
    ("Infinix", "Zero 5G 2023", "128GB"),
    ("Tecno", "Spark 10", "64GB"),
]

# Generate dataset
data = []
for date in date_range:
    is_festival = date.strftime("%Y-%m-%d") in festival_days
    for i, (brand, model, storage) in enumerate(mobiles):
        base_price = random.randint(10000, 30000)
        discount = random.choice([0, 5, 10]) if not is_festival else random.choice([10, 20, 30])
        price = int(base_price * (1 - discount / 100))
        stock = random.randint(30, 150)
        units_sold = random.randint(1, min(stock, 40)) if not is_festival else random.randint(10, min(stock, 70))
        revenue = price * units_sold

        data.append({
            "date": date.strftime("%Y-%m-%d"),
            "product_id": f"M{i+100}",
            "product_name": model,
            "brand": brand,
            "storage_variant": storage,
            "category": "Mobile",
            "price": price,
            "units_sold": units_sold,
            "revenue": revenue,
            "stock": stock,
            "discount": discount
            # is_festival column dropped for Prophet
        })

# Save as CSV
df = pd.DataFrame(data)
df.to_csv("mobile_sales_2024.csv", index=False)
print("✅ Dataset 'mobile_sales_2024.csv' generated with", len(df), "rows (ready for Prophet).")


✅ Dataset 'mobile_sales_2024.csv' generated with 7320 rows (ready for Prophet).


In [2]:
import pandas as pd
import numpy as np
import random

# Load the sales data (make sure it's generated first)
sales_df = pd.read_csv("mobile_sales_2024.csv")

# Generate behavior data
behavior_data = []

for _, row in sales_df.iterrows():
    product_id = row["product_id"]
    date = row["date"]
    purchases = row["units_sold"]

    # Behavior logic (tweakable)
    views = random.randint(purchases * 10, purchases * 30)
    clicks = random.randint(int(views * 0.1), int(views * 0.4))
    add_to_cart = random.randint(int(clicks * 0.2), int(clicks * 0.8))
    bounce_rate = round(random.uniform(20, 80), 2)

    behavior_data.append({
        "date": date,
        "product_id": product_id,
        "views": views,
        "clicks": clicks,
        "add_to_cart": add_to_cart,
        "purchases": purchases,
        "bounce_rate": bounce_rate
    })

# Save to DataFrame
behavior_df = pd.DataFrame(behavior_data)

# Export to CSV
behavior_df.to_csv("customer_behavior_2024.csv", index=False)
print("✅ Customer behavior dataset 'customer_behavior_2024.csv' generated with", len(behavior_df), "rows.")


✅ Customer behavior dataset 'customer_behavior_2024.csv' generated with 7320 rows.


In [3]:
import pandas as pd
import numpy as np
import random

# Load your sales data to get matching product_ids and dates
sales_df = pd.read_csv("mobile_sales_2024.csv")

competitor_data = []

for _, row in sales_df.iterrows():
    date = row["date"]
    product_id = row["product_id"]
    product_name = row["product_name"]
    base_price = row["price"]
    
    # Simulate competitor prices with slight variation
    flipkart_price = int(base_price * random.uniform(0.95, 1.10))
    amazon_price = int(base_price * random.uniform(0.90, 1.12))
    myntra_price = int(base_price * random.uniform(0.92, 1.08))

    competitor_data.append({
        "date": date,
        "product_id": product_id,
        "product_name": product_name,
        "flipkart_price": flipkart_price,
        "amazon_price": amazon_price,
        "myntra_price": myntra_price
    })

# Save it
competitor_df = pd.DataFrame(competitor_data)
competitor_df.to_csv("competitor_prices_2024.csv", index=False)

print("✅ Competitor pricing dataset 'competitor_prices_2024.csv' generated with", len(competitor_df), "rows.")


✅ Competitor pricing dataset 'competitor_prices_2024.csv' generated with 7320 rows.
