In [None]:
from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv('/content/gdrive/MyDrive/ICE-V2-Dataset/Datasets/data_35business_data.csv')

# Group the data by day_of_week and calculate the required statistics
grouped = df.groupby('day_of_week')['is_holiday'].agg(['count', 'sum'])

# Calculate the percentage of holidays
grouped['percentage_holidays'] = (grouped['sum'] / grouped['count']) * 100

# Define the natural order of days of the week
natural_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
grouped = grouped.reindex(natural_order)

# Create a bar chart
fig, ax = plt.subplots(figsize=(12, 6))

# Plot the total count and total number of holidays using different colors
grouped['count'].plot(kind='bar', ax=ax, color='blue', width=0.2, align='edge')
grouped['sum'].plot(kind='bar', ax=ax, color='orange', width=-0.2, align='edge')

ax.set_xticklabels(grouped.index, rotation=0)
ax.set_xlabel('Day of Week')
ax.set_ylabel('Counts')
ax.set_title('Occurrences and Holidays by Day of Week')

# Display the percentage of holidays above each orange bar with vertical alignment
for p in ax.patches:
    if p.get_width() > 0:  # Percentage labels for total count bars
        ax.annotate(f'{p.get_height():.0f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='bottom', xytext=(0, 5), textcoords='offset points')
    else:  # Percentage labels for total number of holidays bars
        percentage = grouped.loc[grouped['sum'] == p.get_height(), 'percentage_holidays'].iloc[0]
        ax.annotate(f'{percentage:.2f}%', (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='bottom', xytext=(0, 5), textcoords='offset points', rotation='vertical')

# Add a legend
ax.legend(['Total Count', 'Total Holidays'])

ax.set_xlim(-0.5, len(natural_order) - 0.5)

plt.show()