In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Read the sales data CSV (assuming BigMartSalesData.csv is in the working directory)
df = pd.read_csv('BigMartSalesData.csv')

# --- Data Preparation ---

# Convert date column to datetime (assuming 'InvoiceDate' is the date column)
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

# Filter data for year 2011 only
df_2011 = df[df['InvoiceDate'].dt.year == 2011]

# Extract month name or number for grouping
df_2011['Month'] = df_2011['InvoiceDate'].dt.month_name()

# 1. Total Sales per Month (Line plot)
sales_per_month = df_2011.groupby('Month')['SalesAmount'].sum()

# To order months correctly, reindex using month order
months_order = ['January', 'February', 'March', 'April', 'May', 'June',
                'July', 'August', 'September', 'October', 'November', 'December']
sales_per_month = sales_per_month.reindex(months_order)

plt.figure(figsize=(10,6))
plt.plot(sales_per_month.index, sales_per_month.values, marker='o')
plt.title('Total Sales Per Month for Year 2011 (Line Plot)')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

# Identify the month with the lowest sales
lowest_sales_month = sales_per_month.idxmin()
print(f"Month with lowest sales: {lowest_sales_month}")

# 2. Total Sales per Month (Bar chart with values on bars)

plt.figure(figsize=(10,6))
bars = plt.bar(sales_per_month.index, sales_per_month.values, color='skyblue')
plt.title('Total Sales Per Month for Year 2011 (Bar Chart)')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)

# Enhancement: Show value on top of each bar
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, height, f'{int(height):,}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Bar chart vs line plot? 
print("Bar chart better visualizes discrete monthly sales amounts and exact values, helpful for top management to compare months clearly.")

# 3. Pie Chart for Sales by Country

sales_by_country = df_2011.groupby('Country')['SalesAmount'].sum()

plt.figure(figsize=(8,8))
plt.pie(sales_by_country, labels=sales_by_country.index, autopct='%1.1f%%', shadow=True, startangle=90)
plt.title('Country Wise Sales Contribution for 2011')
plt.axis('equal')  # Equal aspect ratio to make pie circular
plt.show()

highest_sales_country = sales_by_country.idxmax()
print(f"Country with highest sales contribution: {highest_sales_country}")

# 4. Scatter Plot of Invoice Amounts

# Assuming 'InvoiceAmount' column has invoice totals
plt.figure(figsize=(10,6))
plt.scatter(range(len(df_2011['InvoiceAmount'])), df_2011['InvoiceAmount'], c='orange', alpha=0.5)
plt.title('Scatter Plot of Invoice Amounts in 2011')
plt.xlabel('Invoice Index')
plt.ylabel('Invoice Amount')
plt.grid(True)
plt.show()

# To find concentration range
print(f"Invoice amount stats: min={df_2011['InvoiceAmount'].min()}, max={df_2011['InvoiceAmount'].max()}, mean={df_2011['InvoiceAmount'].mean()}")

# Optional: Plot histogram to see concentration better
plt.figure(figsize=(10,6))
plt.hist(df_2011['InvoiceAmount'], bins=50, color='green', alpha=0.7)
plt.title('Histogram of Invoice Amounts')
plt.xlabel('Invoice Amount')
plt.ylabel('Frequency')
plt.show()
