# Title : Analyzing Sales Data from Multiple File Formats

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
csv_data = pd.read_csv("datasets/sales_data_sample.csv", encoding="cp1252")
excel_data = pd.read_excel("datasets/Sample-Sales-Data.xlsx")
json_data = pd.read_json("datasets/customers.json")

In [None]:
def structure(data, name="Data"):
    display(f"--- {name} ---")
    display(data.head(), "\n")
    display(data.tail(), "\n")
    display(data.info(), "\n")
    display(data.describe(), "\n")

def clean_data(data):
    print(data.isna().sum())
    data.dropna(inplace=True)
    data.drop_duplicates(inplace=True)

In [None]:
structure(csv_data, "CSV Data")
structure(excel_data, "Excel Data")
structure(json_data, "JSON Data")

In [None]:
clean_data(csv_data)

In [None]:
clean_data(excel_data)

In [None]:
clean_data(json_data)

In [None]:
unified_data = pd.concat([csv_data, excel_data], ignore_index=True)
unified_data.sample(5)

In [None]:
display(unified_data[["SALES","Value"]].describe())

total_sales = unified_data['SALES'].sum()
print("Total Sales:", total_sales)

category_sales = unified_data.groupby('ORDERNUMBER')['SALES'].mean()
print("\nAverage Sales by Order Number:\n", category_sales)

In [None]:
sns.boxplot(unified_data['SALES'])
plt.title('Sales Statistics (Box Plot)')
plt.xlabel('Sales Value')
plt.show()
