In [3]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path("sales_data")  # folder containing CSVs
OUTPUT_XLSX = Path("final_sales_data.xlsx")


In [5]:
files = list(DATA_DIR.glob("*.csv"))
if not files:
    raise FileNotFoundError(f"No CSV files found in: {DATA_DIR.resolve()}")
df = pd.concat((pd.read_csv(f) for f in files), ignore_index=True)
df.head()


Unnamed: 0,store_id,product,price,quantity
0,1,A,100,2
1,2,B,200,1
2,3,C,150,3
3,1,D,120,4
4,2,E,300,1


In [6]:
df = df.drop_duplicates().dropna(how="any")


In [7]:
df["price"] = pd.to_numeric(df["price"], errors="coerce")
df["quantity"] = pd.to_numeric(df["quantity"], errors="coerce")
df["total_sale_value"] = df["price"] * df["quantity"]
df = df.dropna(subset=["total_sale_value"])
df.head()


Unnamed: 0,store_id,product,price,quantity,total_sale_value
0,1,A,100,2,200
1,2,B,200,1,200
2,3,C,150,3,450
3,1,D,120,4,480
4,2,E,300,1,300


In [8]:
top_5 = (
    df.groupby("store_id", as_index=False)["total_sale_value"]
      .sum()
      .sort_values("total_sale_value", ascending=False)
      .head(5)
)
top_5


Unnamed: 0,store_id,total_sale_value
2,3,950
3,4,900
0,1,680
1,2,500
4,5,500


In [10]:
!pip install openpyxl


Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl

   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openpyxl]
   -------------------- ------------------- 1/2 [openp


[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
with pd.ExcelWriter(OUTPUT_XLSX, engine="openpyxl") as writer:
    df.to_excel(writer, index=False, sheet_name="all_sales")
    top_5.to_excel(writer, index=False, sheet_name="top_5_stores")

print("Excel saved at:", OUTPUT_XLSX.resolve())


Excel saved at: C:\Users\barka\final_sales_data.xlsx
