In [None]:
#Load Dataset
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load Dataset
df = pd.read_excel("Cleaned Tourism Data.xlsx", engine="openpyxl")
                       
# Basic structure
df.info()

Price Distribution

In [None]:

sns.histplot(df['Price (GHS)'], kde=True, color='skyblue')
plt.title("Distribution of Tour Prices")
plt.xlabel("Price (GHS)")
plt.ylabel("Frequency")
plt.show()


Most Popular Destinations

In [None]:
top_destinations = df['Destination'].value_counts().head(10)

plt.figure(figsize=(10,6))
sns.barplot(x=top_destinations.values, y=top_destinations.index, palette='viridis')
plt.title('Top 10 Destinations')
plt.xlabel('Number of Bookings')
plt.ylabel('Destination')
plt.tight_layout()
plt.show()




Booking Channels and Booking Tpyes

In [None]:
plt.figure(figsize=(12,6))

# Count grouped by Booking Channel and Booking Type
channel_type_counts = df.groupby(['Booking Channel', 'Booking Type']).size().reset_index(name='Count')

# Plot as grouped bar chart
sns.barplot(data=channel_type_counts, x='Booking Channel', y='Count', hue='Booking Type', palette='coolwarm')

plt.title('Bookings by Channel and Type')
plt.xlabel('Booking Channel')
plt.ylabel('Number of Bookings')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Convert to datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Now extract full month name
df['Month'] = df['Date'].dt.strftime('%B')


Monthly Revenue Trend

In [None]:
# Force desired order
month_order = ['January', 'February', 'October', 'November', 'December']
df['Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)

# Group by Month
monthly_revenue = df.groupby('Month')['Price (GHS)'].sum().reset_index()

# Plot
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 6))
sns.lineplot(data=monthly_revenue, x='Month', y='Price (GHS)', marker='o')
plt.title('Monthly Revenue')
plt.xlabel('Month')
plt.ylabel('Revenue (GHS)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


Best Selling Tour Packages

In [None]:
# Count the number of bookings for each tour package
tour_package_counts = df['Tour Package'].value_counts()

# Plot the top 10 tour packages by count (optional)
plt.figure(figsize=(12,6))
sns.barplot(x=tour_package_counts.head(10).index, y=tour_package_counts.head(10).values, palette='cubehelix')

plt.title('Top 10 Tour Packages by Number of Bookings')
plt.xlabel('Tour Package')
plt.ylabel('Number of Bookings')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()





Revenue by Seasons

In [None]:
# Group by Season and Sum Revenue
season_revenue = df.groupby('Season')['Price (GHS)'].sum()

plt.figure(figsize=(8,8))
plt.pie(season_revenue, labels=season_revenue.index, autopct='%1.1f%%', startangle=140, colors=['#8da0cb','#fc8d62'])
plt.title('Revenue by Season')
plt.axis('equal')  
plt.tight_layout()
plt.show()



Customer Counts per Season

In [None]:

# Count number of entries per season
season_counts = df['Season'].value_counts().sort_index()

# Plot as bar chart
plt.figure(figsize=(8, 6))
season_counts.plot(kind='bar', color='#66c2a5')

plt.title('Count of Customers')
plt.xlabel('Season')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
