In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# set visual style for plots
sns.set(style="whitegrid")
%matplotlib inline

In [None]:
# load the chocolate sales dataset
df = pd.read_csv('Chocolate_Sales.csv')

# check the first few rows of the data
df.head()

In [None]:
# cleaning the 'Amount' column and converting 'Date' to datetime objects
df['Amount'] = df['Amount'].replace('[\$,]', '', regex=True).astype(float)
df['Date'] = pd.to_datetime(df['Date'])

# verifying data types after conversion
df.dtypes

In [None]:
# checking for any missing values in the dataset
print(df.isna().sum())

# summary statistics for numerical features
df.describe()

In [None]:
# visualization of the total revenue generated by each country
plt.figure(figsize=(10, 6))
sns.barplot(data=df, x='Country', y='Amount', estimator=sum)
plt.title('Total Revenue by Country')
plt.xticks(rotation=45)
plt.show()

In [None]:
# distribution plot of the boxes shipped to see the spread of data
plt.figure(figsize=(8, 5))
sns.histplot(df['Boxes Shipped'], bins=20, kde=True)
plt.title('Distribution of Boxes Shipped')
plt.show()

In [None]:
# grouping by product to find the top selling items by box count
product_analysis = df.groupby('Product')['Boxes Shipped'].sum().sort_values(ascending=False)
print(product_analysis)