In [6]:
pip install Faker mysql-connector-python streamlit

Collecting Faker
  Downloading Faker-36.1.1-py3-none-any.whl.metadata (15 kB)
Collecting mysql-connector-python
  Using cached mysql_connector_python-9.2.0-cp313-cp313-win_amd64.whl.metadata (6.2 kB)
Collecting streamlit
  Using cached streamlit-1.42.0-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting tzdata (from Faker)
  Using cached tzdata-2025.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Using cached altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.0.0 (from streamlit)
  Using cached blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<6,>=4.0 (from streamlit)
  Using cached cachetools-5.5.1-py3-none-any.whl.metadata (5.4 kB)
Collecting click<9,>=7.0 (from streamlit)
  Using cached click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting numpy<3,>=1.23 (from streamlit)
  Using cached numpy-2.2.3-cp313-cp313-win_amd64.whl.metadata (60 kB)
Collecting pandas<3,>=1.4.0 (from streamlit)
  Using cached pan


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import random  # Import random at the top of your file (if you haven't already)

# Inside generate_customers()
is_premium = random.choice([0, 1]) # Or, is_premium = int(random.choice([True, False]))

# Inside generate_restaurants()
is_active = random.choice([0, 1]) # Or, is_active = int(random.choice([True, False]))

In [8]:
# generate_data.py
import faker
import random
import mysql.connector
from datetime import datetime, timedelta


In [9]:
# Database Configuration
DB_CONFIG = {
    'host': '127.0.0.1',  # Or the correct host if your MySQL server is remote
    'user': 'root',        # Replace with your MySQL username
    'password': '12345678', # Replace with your MySQL password
    'database': 'zomato_db',  # The database you created
}

In [10]:
# Initialize Faker
fake = faker.Faker()

In [11]:
# Function to Connect to the Database
def connect_to_db():
    try:
        mydb = mysql.connector.connect(**DB_CONFIG)
        mycursor = mydb.cursor()
        return mydb, mycursor
    except mysql.connector.Error as err:
        print(f"Error connecting to database: {err}")
        return None, None

In [12]:
# Customer Data Generation
def generate_customers(num_customers=100):
    customers = []
    for _ in range(num_customers):
        customer_id = fake.uuid4()  # Using UUID for unique IDs
        name = fake.name()
        email = fake.email()
        phone = fake.phone_number()
        location = fake.address()
        signup_date = fake.date_this_decade()
        is_premium = fake.random_element(elements=(True, False))
        preferred_cuisine = fake.random_element(elements=('Indian', 'Chinese', 'Italian', 'Mexican', 'Japanese'))
        total_orders = fake.random_int(min=0, max=50)
        average_rating = round(fake.random_number(digits=1) + 1, 1)  # Ratings from 1.0 to 5.0
        customers.append((customer_id, name, email, phone, location, signup_date, is_premium, preferred_cuisine, total_orders, average_rating))
    return customers


In [13]:
# Restaurant Data Generation
def generate_restaurants(num_restaurants=20):
    restaurants = []
    for _ in range(num_restaurants):
        restaurant_id = fake.uuid4()
        name = fake.company() + " Restaurant"
        cuisine_type = fake.random_element(elements=('Indian', 'Chinese', 'Italian', 'Mexican', 'Japanese'))
        location = fake.address()
        owner_name = fake.name()
        average_delivery_time = fake.random_int(min=15, max=60) # Minutes
        contact_number = fake.phone_number()
        rating = round(fake.random_number(digits=1) + 1, 1)  # Ratings from 1.0 to 5.0
        total_orders = fake.random_int(min=0, max=1000)
        is_active = fake.random_element(elements=(True, False))
        restaurants.append((restaurant_id, name, cuisine_type, location, owner_name, average_delivery_time, contact_number, rating, total_orders, is_active))
    return restaurants

In [14]:
# Order Data Generation
def generate_orders(num_orders=200):
    orders = []
    customers = get_customer_ids()
    restaurants = get_restaurant_ids()

    if not customers or not restaurants:
        print("No customers or restaurants found.  Skipping order generation.")
        return orders  # Return an empty list if there are no customers or restaurants

    for _ in range(num_orders):
        order_id = fake.uuid4()
        customer_id = fake.random_element(elements=customers)  # Random customer ID
        restaurant_id = fake.random_element(elements=restaurants) # Random restaurant ID
        order_date = fake.date_time_between(start_date='-1y', end_date='now')
        delivery_time = order_date + timedelta(minutes=fake.random_int(min=15, max=90))  # Delivery time after order time
        status = fake.random_element(elements=('Pending', 'Delivered', 'Cancelled'))
        total_amount = round(fake.random_number(digits=2, as_int=False) * fake.random_int(min=10, max=100), 2)
        payment_mode = fake.random_element(elements=('Credit Card', 'Cash', 'UPI'))
        discount_applied = round(fake.random_number(digits=2, as_int=False) * total_amount * 0.1, 2)  # Up to 10% discount
        feedback_rating = round(fake.random_number(digits=1) + 1, 1) if status == 'Delivered' else None # Only rate delivered orders
        orders.append((order_id, customer_id, restaurant_id, order_date, delivery_time, status, total_amount, payment_mode, discount_applied, feedback_rating))
    return orders

In [15]:
# Delivery Data Generation
def generate_deliveries(num_deliveries=200):
    deliveries = []
    orders = get_order_ids()

    if not orders:
        print("No orders found. Skipping delivery generation.")
        return deliveries

    for _ in range(num_deliveries):
        delivery_id = fake.uuid4()
        order_id = fake.random_element(elements=orders)
        delivery_person_id = fake.uuid4()  # Assuming you don't have delivery person table yet.  Make this a UUID.
        delivery_status = fake.random_element(elements=('On the way', 'Delivered', 'Delayed'))
        distance = round(fake.random_number(digits=2, as_int=False) * 5, 2)  # Distance in km, up to 5km
        delivery_time = fake.random_int(min=15, max=90)  # Delivery time in minutes
        estimated_time = delivery_time + fake.random_int(min=0, max=15) # Estimated time might be slightly longer
        delivery_fee = round(fake.random_number(digits=2, as_int=False) * 2, 2)  # Delivery fee up to $2
        vehicle_type = fake.random_element(elements=('Bike', 'Car', 'Scooter'))
        deliveries.append((delivery_id, order_id, delivery_person_id, delivery_status, distance, delivery_time, estimated_time, delivery_fee, vehicle_type))
    return deliveries


In [16]:
# Get Customer IDs
def get_customer_ids():
    mydb, mycursor = connect_to_db()
    if not mycursor:
        return []

    try:
        mycursor.execute("SELECT customer_id FROM customers")
        customer_ids = [row[0] for row in mycursor.fetchall()]
        return customer_ids
    except mysql.connector.Error as err:
        print(f"Error fetching customer IDs: {err}")
        return []
    finally:
        if mydb:
            mydb.close()

In [17]:
# Get Restaurant IDs
def get_restaurant_ids():
    mydb, mycursor = connect_to_db()
    if not mycursor:
        return []

    try:
        mycursor.execute("SELECT restaurant_id FROM restaurants")
        restaurant_ids = [row[0] for row in mycursor.fetchall()]
        return restaurant_ids
    except mysql.connector.Error as err:
        print(f"Error fetching restaurant IDs: {err}")
        return []
    finally:
        if mydb:
            mydb.close()

In [18]:
# Get Order IDs
def get_order_ids():
    mydb, mycursor = connect_to_db()
    if not mycursor:
        return []

    try:
        mycursor.execute("SELECT order_id FROM orders")
        order_ids = [row[0] for row in mycursor.fetchall()]
        return order_ids
    except mysql.connector.Error as err:
        print(f"Error fetching order IDs: {err}")
        return []
    finally:
        if mydb:
            mydb.close()

In [19]:
# Function to Create Tables
def create_tables():
    mydb, mycursor = connect_to_db()
    if not mycursor:
        return

    try:
        # Customers Table
        mycursor.execute("""
            CREATE TABLE IF NOT EXISTS customers (
                customer_id VARCHAR(36) PRIMARY KEY,
                name VARCHAR(255),
                email VARCHAR(255),
                phone VARCHAR(20),
                location TEXT,
                signup_date DATE,
                is_premium BOOLEAN,
                preferred_cuisine VARCHAR(50),
                total_orders INT,
                average_rating DECIMAL(3, 1)
            )
        """)

        # Restaurants Table
        mycursor.execute("""
            CREATE TABLE IF NOT EXISTS restaurants (
                restaurant_id VARCHAR(36) PRIMARY KEY,
                name VARCHAR(255),
                cuisine_type VARCHAR(50),
                location TEXT,
                owner_name VARCHAR(255),
                average_delivery_time INT,
                contact_number VARCHAR(20),
                rating DECIMAL(3, 1),
                total_orders INT,
                is_active BOOLEAN
            )
        """)

        # Orders Table
        mycursor.execute("""
            CREATE TABLE IF NOT EXISTS orders (
                order_id VARCHAR(36) PRIMARY KEY,
                customer_id VARCHAR(36),
                restaurant_id VARCHAR(36),
                order_date DATETIME,
                delivery_time DATETIME,
                status VARCHAR(50),
                total_amount DECIMAL(10, 2),
                payment_mode VARCHAR(50),
                discount_applied DECIMAL(10, 2),
                feedback_rating DECIMAL(3, 1),
                FOREIGN KEY (customer_id) REFERENCES customers(customer_id),
                FOREIGN KEY (restaurant_id) REFERENCES restaurants(restaurant_id)
            )
        """)

        # Deliveries Table
        mycursor.execute("""
            CREATE TABLE IF NOT EXISTS deliveries (
                delivery_id VARCHAR(36) PRIMARY KEY,
                order_id VARCHAR(36),
                delivery_person_id VARCHAR(36),
                delivery_status VARCHAR(50),
                distance DECIMAL(5, 2),
                delivery_time INT,
                estimated_time INT,
                delivery_fee DECIMAL(5, 2),
                vehicle_type VARCHAR(50),
                FOREIGN KEY (order_id) REFERENCES orders(order_id)
            )
        """)

        mydb.commit()  # Commit the changes
        print("Tables created successfully!")

    except mysql.connector.Error as err:
        print(f"Error creating tables: {err}")
    finally:
        if mydb:
            mydb.close()

# Function to Insert Data
def insert_data(table_name, data, column_count):
    mydb, mycursor = connect_to_db()
    if not mycursor:
        return

    try:
        placeholders = ', '.join(['%s'] * column_count) # Create placeholders like %s, %s, %s...
        sql = f"INSERT INTO {table_name} VALUES ({placeholders})"
        mycursor.executemany(sql, data)
        mydb.commit()
        print(f"Data inserted into {table_name} successfully.")
    except mysql.connector.Error as err:
        print(f"Error inserting data into {table_name}: {err}")
    finally:
        if mydb:
            mydb.close()


In [20]:
# Main execution
if __name__ == "__main__":
    create_tables() # Create the tables first

    # Generate and Insert Data
    customers_data = generate_customers()
    insert_data("customers", customers_data, 10)

    restaurants_data = generate_restaurants()
    insert_data("restaurants", restaurants_data, 10)

    orders_data = generate_orders()
    insert_data("orders", orders_data, 10) # Should be 10

    deliveries_data = generate_deliveries()
    insert_data("deliveries", deliveries_data, 9) # Should be 9

Tables created successfully!
Error inserting data into customers: 1136 (21S01): Column count doesn't match value count at row 1
Error inserting data into restaurants: 1136 (21S01): Column count doesn't match value count at row 1
No customers or restaurants found.  Skipping order generation.
Data inserted into orders successfully.
No orders found. Skipping delivery generation.
Data inserted into deliveries successfully.
