In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

def generate_foodservice_data(num_customers=100):
    # Business Categories and Subcategories

    business_categories = {
        'Restaurant': [
            'Fine Dining',
            'Casual Dining',
            'Fast Casual',
            'Quick Service',
            'Family Restaurant',
            'Ethnic Restaurant',
            'Steakhouse',
            'Seafood Restaurant',
            'Pizza Restaurant'
        ],
        'Hotel': [
            'Luxury Hotel',
            'Business Hotel',
            'Resort',
            'Boutique Hotel',
            'Extended Stay Hotel'
        ],
        'Bakery': [
            'Artisan Bakery',
            'Retail Bakery',
            'Wholesale Bakery',
            'Pastry Shop',
            'Cafe-Bakery'
        ],
        'Institution': [
            'School Cafeteria',
            'University Dining',
            'Hospital',
            'Nursing Home',
            'Corporate Cafeteria'
        ],
        'Catering': [
            'Corporate Catering',
            'Wedding Catering',
            'Event Catering',
            'Food Truck',
            'Meal Delivery Service'
        ]
    }


    # Product Categories with typical requirements by business type
    product_categories = {
        'Vegetables': {
            'products': [
                {'name': 'Mixed Lettuce', 'price': 25.99, 'unit': 'Case'},
                {'name': 'Roma Tomatoes', 'price': 29.99, 'unit': 'Case'},
                {'name': 'Yellow Onions', 'price': 19.99, 'unit': '50lb'},
                {'name': 'Russet Potatoes', 'price': 34.99, 'unit': '50lb'},
                {'name': 'Bell Peppers', 'price': 32.99, 'unit': 'Case'},
                {'name': 'Carrots', 'price': 22.99, 'unit': '25lb'},
                {'name': 'Celery', 'price': 27.99, 'unit': 'Case'},
                {'name': 'Mushrooms', 'price': 35.99, 'unit': 'Case'}
            ]
        },
        'Fruits': {
            'products': [
                {'name': 'Bananas', 'price': 24.99, 'unit': 'Case'},
                {'name': 'Apples', 'price': 45.99, 'unit': 'Case'},
                {'name': 'Oranges', 'price': 39.99, 'unit': 'Case'},
                {'name': 'Lemons', 'price': 34.99, 'unit': 'Case'},
                {'name': 'Limes', 'price': 32.99, 'unit': 'Case'},
                {'name': 'Berries Mix', 'price': 49.99, 'unit': 'Case'},
                {'name': 'Fresh Melons', 'price': 29.99, 'unit': 'Case'}
            ]
        },
        'Dairy': {
            'products': [
                {'name': 'Whole Milk', 'price': 29.99, 'unit': 'Case'},
                {'name': 'Heavy Cream', 'price': 39.99, 'unit': 'Case'},
                {'name': 'Butter', 'price': 89.99, 'unit': 'Case'},
                {'name': 'Cheddar Cheese', 'price': 69.99, 'unit': 'Case'},
                {'name': 'Mozzarella', 'price': 74.99, 'unit': 'Case'},
                {'name': 'Yogurt', 'price': 34.99, 'unit': 'Case'},
                {'name': 'Sour Cream', 'price': 29.99, 'unit': 'Case'},
                {'name': 'Cream Cheese', 'price': 44.99, 'unit': 'Case'}
            ]
        },
        'Grains': {
            'products': [
                {'name': 'All-Purpose Flour', 'price': 29.99, 'unit': '50lb'},
                {'name': 'White Rice', 'price': 34.99, 'unit': '50lb'},
                {'name': 'Brown Rice', 'price': 39.99, 'unit': '50lb'},
                {'name': 'Pasta Assortment', 'price': 45.99, 'unit': 'Case'},
                {'name': 'Quinoa', 'price': 89.99, 'unit': '25lb'},
                {'name': 'Bread Flour', 'price': 32.99, 'unit': '50lb'},
                {'name': 'Oats', 'price': 29.99, 'unit': '50lb'}
            ]
        },
        'Meat': {
            'products': [
                {'name': 'Chicken Breast', 'price': 89.99, 'unit': 'Case'},
                {'name': 'Ground Beef', 'price': 129.99, 'unit': 'Case'},
                {'name': 'Pork Loin', 'price': 99.99, 'unit': 'Case'},
                {'name': 'Bacon', 'price': 79.99, 'unit': 'Case'},
                {'name': 'Turkey Breast', 'price': 84.99, 'unit': 'Case'},
                {'name': 'Beef Strips', 'price': 149.99, 'unit': 'Case'},
                {'name': 'Sausage Links', 'price': 69.99, 'unit': 'Case'}
            ]
        },
        'Seafood': {
            'products': [
                {'name': 'Salmon Fillets', 'price': 199.99, 'unit': 'Case'},
                {'name': 'Shrimp 16/20', 'price': 159.99, 'unit': 'Case'},
                {'name': 'Cod Fillets', 'price': 139.99, 'unit': 'Case'},
                {'name': 'Tuna Steaks', 'price': 189.99, 'unit': 'Case'},
                {'name': 'Mussels', 'price': 79.99, 'unit': 'Case'},
                {'name': 'Tilapia Fillets', 'price': 109.99, 'unit': 'Case'}
            ]
        },
        'Frozen Foods': {
            'products': [
                {'name': 'Mixed Vegetables', 'price': 45.99, 'unit': 'Case'},
                {'name': 'French Fries', 'price': 39.99, 'unit': 'Case'},
                {'name': 'Ice Cream', 'price': 49.99, 'unit': 'Case'},
                {'name': 'Frozen Dough', 'price': 54.99, 'unit': 'Case'},
                {'name': 'Frozen Berries', 'price': 59.99, 'unit': 'Case'},
                {'name': 'Pizza Bases', 'price': 44.99, 'unit': 'Case'}
            ]
        },
        'Beverages': {
            'products': [
                {'name': 'Soft Drinks', 'price': 24.99, 'unit': 'Case'},
                {'name': 'Coffee Beans', 'price': 89.99, 'unit': 'Case'},
                {'name': 'Tea Assortment', 'price': 39.99, 'unit': 'Case'},
                {'name': 'Fruit Juices', 'price': 34.99, 'unit': 'Case'},
                {'name': 'Bottled Water', 'price': 19.99, 'unit': 'Case'},
                {'name': 'Sports Drinks', 'price': 29.99, 'unit': 'Case'}
            ]
        }
    }


    customers = []
    for i in range(num_customers):
        # Select business category and subcategory
        category = np.random.choice(list(business_categories.keys()))
        subcategory = np.random.choice(business_categories[category])
        scale = np.random.choice(['Small', 'Medium', 'Large'])

        # Generate location and demographic data
        location = np.random.choice(['Urban', 'Suburban', 'Rural'])
        years_in_business = np.random.randint(1, 30)

        # Generate employee count based on scale
        if scale == 'Small':
            employees = np.random.randint(5, 20)
            daily_customers = np.random.randint(20, 100)
        elif scale == 'Medium':
            employees = np.random.randint(20, 50)
            daily_customers = np.random.randint(100, 300)
        else:  # Large
            employees = np.random.randint(50, 150)
            daily_customers = np.random.randint(300, 1000)

        # Generate order history
        num_orders = np.random.randint(10, 50)
        order_dates = [(datetime.now() - timedelta(days=x*7)) for x in range(num_orders)]

        # Generate average order size based on scale and daily customers
        base_order = daily_customers * np.random.uniform(10, 20)
        if scale == 'Small':
            avg_order_size = base_order * np.random.uniform(0.8, 1.2)
        elif scale == 'Medium':
            avg_order_size = base_order * np.random.uniform(1.2, 1.8)
        else:
            avg_order_size = base_order * np.random.uniform(1.8, 2.5)

        # Select recommended products based on business type
        relevant_categories = []
        if category == 'Restaurant':
            if 'Fine Dining' in subcategory:
                relevant_categories = ['Vegetables', 'Fruits', 'Meat', 'Seafood', 'Dairy']
            elif 'Fast' in subcategory:
                relevant_categories = ['Meat', 'Frozen Foods', 'Beverages', 'Dairy']
            else:
                relevant_categories = ['Vegetables', 'Meat', 'Dairy', 'Grains']
        elif category == 'Bakery':
            relevant_categories = ['Grains', 'Dairy', 'Fruits']
        elif category == 'Hotel':
            relevant_categories = ['Vegetables', 'Fruits', 'Meat', 'Seafood', 'Dairy', 'Beverages']
        elif category == 'Institution':
            relevant_categories = ['Vegetables', 'Fruits', 'Dairy', 'Grains']
        else:  # Catering
            relevant_categories = ['Vegetables', 'Meat', 'Seafood', 'Beverages']

        # Create customer record
        customer = {
            'customer_id': f'CUST_{i+1:04d}',
            'business_name': f'Business_{i+1}',
            'category': category,
            'subcategory': subcategory,
            'scale': scale,
            'location': location,
            'years_in_business': years_in_business,
            'employees': employees,
            'estimated_daily_customers': daily_customers,
            'avg_order_size': round(avg_order_size, 2),
            'order_frequency': np.random.choice(['Weekly', 'Bi-weekly', 'Monthly']),
            'last_order_date': order_dates[0].strftime('%Y-%m-%d'),
            'total_orders_ytd': num_orders,
            'storage_capacity': np.random.choice(['Limited', 'Medium', 'Large']),
            'sustainability_focus': np.random.choice([True, False]),
            'quality_preference': np.random.choice(['Premium', 'Standard', 'Economy']),
            'recommended_products': ', '.join(np.random.choice(relevant_categories, 3, replace=False)),
            'primary_product_categories': ', '.join(relevant_categories)
        }

        customers.append(customer)

    return pd.DataFrame(customers)

# Generate sample dataset
df = generate_foodservice_data(100)

# Display first few rows and data info
print("\nDataset Info:")
print(df.info())
print("\nSample Records:")
print(df.head())

# Save to CSV
df.to_csv('foodservice_customer_data.csv', index=False)


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 18 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   customer_id                 100 non-null    object 
 1   business_name               100 non-null    object 
 2   category                    100 non-null    object 
 3   subcategory                 100 non-null    object 
 4   scale                       100 non-null    object 
 5   location                    100 non-null    object 
 6   years_in_business           100 non-null    int64  
 7   employees                   100 non-null    int64  
 8   estimated_daily_customers   100 non-null    int64  
 9   avg_order_size              100 non-null    float64
 10  order_frequency             100 non-null    object 
 11  last_order_date             100 non-null    object 
 12  total_orders_ytd            100 non-null    int64  
 13  storage_capacity     