In [None]:
# Set random seed for reproducibility
random.seed(101)
np.random.seed(101)

In [None]:
def generate_restaurant_data(n_samples=1000):
    locations = ['New York', 'San Francisco', 'Boston', 'Chicago', 'Miami']
    cuisines = ['Chinese', 'Japanese', 'Italian', 'Indian', 'Mexican']
    price_ranges = ['Cheap', 'Moderate', 'Expensive', 'Very Expensive']

    data = {
        'Location': np.random.choice(locations, n_samples),
        'Cuisine': np.random.choice(cuisines, n_samples),
        'Number_of_Reviews': np.random.randint(10, 1000, n_samples),
        'Price_Range': np.random.choice(price_ranges, n_samples),
        'Opening_Year': np.random.randint(1984, 2024, n_samples)
    }

    df = pd.DataFrame(data)

    # More complex rating generation to reduce 5/5 ratings
    base_rating = (
        4 + np.log(df['Number_of_Reviews']) * 0.15 +
        np.where(df['Location'].isin(['New York', 'San Francisco']), 0.3, -0.1) +
        np.where(df['Cuisine'].isin(['Italian', 'Japanese']), 0.4,
                 np.where(df['Cuisine'].isin(['Mexican', 'Indian']), 0.2, -0.1)) +
        np.where(df['Price_Range'].isin(['Expensive', 'Very Expensive']), 0.3,
                 np.where(df['Price_Range'] == 'Moderate', 0.1, -0.2)) +
        (2024 - df['Opening_Year']) * -0.01
    )

    # Add noise with reduced probability of extreme ratings
    noise = np.random.normal(0, 0.7, n_samples)
    df['Ratings'] = (base_rating + noise).clip(1, 5)

    # Reduce probability of 5-star ratings
    df.loc[df['Ratings'] > 4.7, 'Ratings'] = np.random.uniform(4.0, 4.7, size=len(df[df['Ratings'] > 4.7]))

    return df

In [None]:
 restaurant_data = generate_restaurant_data()
 restaurant_data.to_csv('restaurant_ratings_dataset.csv', index=False)