In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:.2f}'.format)

In [None]:
def setup_directories():
    output_dir = Path("output")
    plots_dir = output_dir / "plots"
    output_dir.mkdir(exist_ok=True)
    plots_dir.mkdir(exist_ok=True)
    return output_dir, plots_dir

In [None]:
def load_and_inspect_data(csv_path):
    # Try with default encoding first
    try:
        df = pd.read_csv(csv_path)
    except UnicodeDecodeError:
        # If that fails, try with cp1252 encoding (for MS-DOS CSV)
        df = pd.read_csv(csv_path, encoding='cp1252')
    
    print("\nData Overview:")
    print(f"Shape: {df.shape}")
    print("Columns:", df.columns.tolist())
    print("\nMissing Values:")
    print(df.isnull().sum())
    return df

In [None]:
def clean_and_preprocess(df):
    print("\nCleaning and preprocessing data...")
    df = df.dropna(subset=["CustomerID", "InvoiceNo"])
    df["InvoiceDate"] = pd.to_datetime(df["InvoiceDate"])
    df = df[(df["Quantity"] > 0) & (df["UnitPrice"] > 0)]
    df["TotalPrice"] = df["Quantity"] * df["UnitPrice"]
    df["CustomerID"] = df["CustomerID"].fillna("Unknown").astype(str)
    return df

In [None]:
def basic_analysis(df):
    print("\nBasic Statistics:")
    print(df.describe())
    print("\nTop 10 Products by Sales:")
    print(df.groupby("Description")["TotalPrice"].sum().sort_values(ascending=False).head(10))

In [None]:
def create_visualizations(df, plots_dir):
    print("\nCreating visualizations...")

In [None]:
    # Daily sales
    df_daily = df.set_index("InvoiceDate").resample("D")["TotalPrice"].sum()
    plt.figure(figsize=(12,6))
    df_daily.plot()
    plt.title("Daily Sales Trend")
    plt.xlabel("Date")
    plt.ylabel("Revenue")
    plt.tight_layout()
    plt.savefig(plots_dir / "daily_sales_trend.png")
    plt.close()

In [None]:
    # Top 10 countries
    top_countries = df.groupby("Country")["TotalPrice"].sum().sort_values(ascending=False).head(10)
    plt.figure(figsize=(10,6))
    top_countries.plot(kind="bar")
    plt.title("Top 10 Countries by Revenue")
    plt.ylabel("Revenue")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(plots_dir / "top_countries.png")
    plt.close()

In [None]:
    # Top 10 products
    top_products = df.groupby("Description")["TotalPrice"].sum().sort_values(ascending=False).head(10)
    plt.figure(figsize=(10,6))
    top_products.plot(kind="barh")
    plt.title("Top 10 Products by Revenue")
    plt.xlabel("Revenue")
    plt.tight_layout()
    plt.savefig(plots_dir / "top_products.png")
    plt.close()

In [None]:
def analyze_customer_behavior(df):
    print("\nCustomer Analysis:")
    customer_df = df.groupby("CustomerID").agg({
        "InvoiceNo": pd.Series.nunique,
        "Quantity": np.sum,
        "TotalPrice": np.sum
    }).rename(columns={"InvoiceNo": "NumOrders"})

In [None]:
    print("\nTop 5 Customers by Revenue:")
    print(customer_df.sort_values("TotalPrice", ascending=False).head())

In [None]:
def save_outputs(df, output_dir):
    df.to_csv(output_dir / "cleaned_online_retail.csv", index=False)
    print(f"\nCleaned data saved to {output_dir / 'cleaned_online_retail.csv'}")

In [None]:
def main():
    # Updated with your provided path
    csv_path = r"C:\Users\ayush\OneDrive\Desktop\Copy of Online Retail.csv"
    output_dir, plots_dir = setup_directories()
    
    try:
        df = load_and_inspect_data(csv_path)
        df_clean = clean_and_preprocess(df)
        basic_analysis(df_clean)
        create_visualizations(df_clean, plots_dir)
        analyze_customer_behavior(df_clean)
        save_outputs(df_clean, output_dir)
        print("\n🎉 Analysis complete!")
    except Exception as e:
        print(f"An error occurred: {e}")
        print("If this is an encoding issue, try opening the CSV file and resaving it as a standard CSV.")

In [None]:
if __name__ == "__main__":
    main()