In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset with error handling
try:
    data = pd.read_csv("C:/Users/anuro/OneDrive/Desktop/CBL/deliver.csv")
except FileNotFoundError:
    print("Error: File not found. Please check the file path.")
    exit()

# Fix typo in column name
data = data.rename(columns={"Restaurnat Rating": "Restaurant Rating"})

# Check for required columns
required_columns = ["Frequently ordered Meal category", "Restaurant Rating"]
if not all(col in data.columns for col in required_columns):
    print("Error: One or more required columns are missing.")
    exit()

# Handle NaN values (optional: drop rows with NaN in key columns)
data = data.dropna(subset=required_columns)

# --- Graph 1: Bar Chart - Distribution of Frequently Ordered Meal Categories ---
meal_counts = data['Frequently ordered Meal category'].value_counts()
plt.figure(figsize=(10, 6))
meal_counts.plot(kind='bar', color='skyblue')
plt.title('Distribution of Frequently Ordered Meal Categories')
plt.xlabel('Meal Category')
plt.ylabel('Number of Orders')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('meal_category_distribution.png')
plt.close()

# --- Graph 2: Histogram - Distribution of Restaurant Ratings ---
plt.figure(figsize=(8, 6))
plt.hist(data['Restaurant Rating'], bins=5, range=(1, 5), color='lightgreen', edgecolor='black')
plt.title('Distribution of Restaurant Ratings')
plt.xlabel('Rating (1-5)')
plt.ylabel('Frequency')
plt.xticks([1, 2, 3, 4, 5])
plt.grid(axis='y', alpha=0.75)
plt.tight_layout()
plt.savefig('restaurant_rating_distribution.png')
plt.close()

# --- Graph 3: Bar Chart - Average Restaurant Rating by Meal Category ---
avg_ratings = data.groupby('Frequently ordered Meal category')['Restaurant Rating'].mean().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
avg_ratings.plot(kind='bar', color='coral')
plt.title('Average Restaurant Rating by Meal Category')
plt.xlabel('Meal Category')
plt.ylabel('Average Rating')
plt.ylim(0, 5)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('avg_rating_by_category.png')
plt.close()

# --- Graph 4: Heatmap - Sample User-Item Rating Matrix ---
user_item_rating = [(idx, row['Frequently ordered Meal category'], row['Restaurant Rating']) 
                    for idx, row in data.iterrows()]
ratings_df = pd.DataFrame(user_item_rating, columns=['user_id', 'item', 'rating'])
sample_data = ratings_df.head(10)
matrix = sample_data.pivot(index='user_id', columns='item', values='rating')

plt.figure(figsize=(10, 6))
sns.heatmap(matrix, annot=True, cmap='YlGnBu', cbar=True, fmt='.1f')
plt.title('Sample User-Item Rating Matrix')
plt.xlabel('Meal Category')
plt.ylabel('User ID')
plt.tight_layout()
plt.savefig('user_item_matrix.png')
plt.close()

print("Graphs saved as PNG files: 'meal_category_distribution.png', 'restaurant_rating_distribution.png', "
      "'avg_rating_by_category.png', 'user_item_matrix.png'")

Error: One or more required columns are missing.


KeyError: ['Frequently ordered Meal category']

: 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset with error handling
try:
    data = pd.read_csv("C:/Users/anuro/OneDrive/Desktop/CBL/deliver.csv")
except FileNotFoundError:
    print("Error: File not found. Please check the file path.")
    exit()

# Fix typo in column name
data = data.rename(columns={"Restaurnat Rating": "Restaurant Rating"})

# Check for required columns
required_columns = ["Frequently ordered Meal category", "Restaurant Rating"]
if not all(col in data.columns for col in required_columns):
    print("Error: One or more required columns are missing. Available columns:", data.columns)
    exit()

# Handle missing values in key columns
data = data.dropna(subset=required_columns)
# Ensure 'Restaurant Rating' is numeric
data['Restaurant Rating'] = pd.to_numeric(data['Restaurant Rating'], errors='coerce')
data = data.dropna(subset=['Restaurant Rating'])  # Drop rows where rating couldn't be converted

# --- Graph 1: Bar Chart - Distribution of Frequently Ordered Meal Categories ---
meal_counts = data['Frequently ordered Meal category'].value_counts()
plt.figure(figsize=(10, 6))
meal_counts.plot(kind='bar', color='skyblue')
plt.title('Distribution of Frequently Ordered Meal Categories')
plt.xlabel('Meal Category')
plt.ylabel('Number of Orders')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('meal_category_distribution.png')
plt.close()

# --- Graph 2: Histogram - Distribution of Restaurant Ratings ---
plt.figure(figsize=(8, 6))
plt.hist(data['Restaurant Rating'], bins=5, range=(1, 5), color='lightgreen', edgecolor='black')
plt.title('Distribution of Restaurant Ratings')
plt.xlabel('Rating (1-5)')
plt.ylabel('Frequency')
plt.xticks([1, 2, 3, 4, 5])
plt.grid(axis='y', alpha=0.75)
plt.tight_layout()
plt.savefig('restaurant_rating_distribution.png')
plt.close()

# --- Graph 3: Bar Chart - Average Restaurant Rating by Meal Category ---
avg_ratings = data.groupby('Frequently ordered Meal category')['Restaurant Rating'].mean().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
avg_ratings.plot(kind='bar', color='coral')
plt.title('Average Restaurant Rating by Meal Category')
plt.xlabel('Meal Category')
plt.ylabel('Average Rating')
plt.ylim(0, 5)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('avg_rating_by_category.png')
plt.close()

# --- Graph 4: Heatmap - Sample User-Item Rating Matrix ---
# Create a user-item matrix for the first 10 users
user_item_rating = [(idx, row['Frequently ordered Meal category'], row['Restaurant Rating']) 
                    for idx, row in data.iterrows()]
ratings_df = pd.DataFrame(user_item_rating, columns=['user_id', 'item', 'rating'])
sample_data = ratings_df.head(10)
matrix = sample_data.pivot(index='user_id', columns='item', values='rating')

plt.figure(figsize=(10, 6))
sns.heatmap(matrix, annot=True, cmap='YlGnBu', cbar=True, fmt='.1f')
plt.title('Sample User-Item Rating Matrix')
plt.xlabel('Meal Category')
plt.ylabel('User ID')
plt.tight_layout()
plt.savefig('user_item_matrix.png')
plt.close()

# Confirmation message
print("Graphs saved as PNG files: 'meal_category_distribution.png', 'restaurant_rating_distribution.png', "
      "'avg_rating_by_category.png', 'user_item_matrix.png'")