In [3]:
# Step 1: Import Required Libraries
import pandas as pd
import sqlite3

# Step 2: Read Sales Data


# 1. Read CSV File
df_csv = pd.read_csv("sales_data (1).csv")
print("CSV Data:\n", df_csv.head())

# 2. Read Excel File
df_excel = pd.read_excel("sales_data.xlsx", sheet_name="SalesSheet")
print(df_excel.head())


# 3. Read JSON File
df_json = pd.read_json("sales_data.json")
print("\nJSON Data:\n", df_json.head())

# 4. Read SQL Database
conn = sqlite3.connect("sales_data.db")

# Example: create table & insert data if not exists
conn.execute("""
CREATE TABLE IF NOT EXISTS sales (
    OrderID INTEGER,
    Product TEXT,
    Category TEXT,
    Price REAL,
    Quantity INTEGER
)
""")
conn.commit()

# Read from SQL
df_sql = pd.read_sql("SELECT * FROM sales", conn)
print("\nSQL Data:\n", df_sql.head())

# Step 3: Merge & Process Data


# Merge all data into one DataFrame
df = pd.concat([df_csv, df_excel, df_json, df_sql], ignore_index=True)

# Drop duplicates
df = df.drop_duplicates()

# Sort by Price (descending)
df = df.sort_values(by="Price", ascending=False)

# Add Total Revenue column
df["Total_Revenue"] = df["Price"] * df["Quantity"]

print("\nProcessed Data:\n", df.head())

# Step 4: Export Cleaned Data

# Save to CSV
df.to_csv("cleaned_sales_data.csv", index=False)

# Save to Excel
df.to_excel("cleaned_sales_data.xlsx", index=False, sheet_name="ProcessedSales")

# Save to JSON
df.to_json("cleaned_sales_data.json", orient="records", indent=4)

# Save to SQL
df.to_sql("cleaned_sales_data", conn, if_exists="replace", index=False)

conn.close()


CSV Data:
    OrderID     Product     Category  Price  Quantity
0      101      Laptop  Electronics    900         1
1      102       Shoes      Fashion    120         2
2      103        Book        Books     30         3
3      104  Smartphone  Electronics    600         1
4      105       Jeans      Fashion     80         4
   OrderID     Product     Category  Price  Quantity
0      101      Laptop  Electronics    900         1
1      102       Shoes      Fashion    120         2
2      103        Book        Books     30         3
3      104  Smartphone  Electronics    600         1
4      105       Jeans      Fashion     80         4

JSON Data:
    OrderID     Product     Category  Price  Quantity
0      101      Laptop  Electronics    900         1
1      102       Shoes      Fashion    120         2
2      103        Book        Books     30         3
3      104  Smartphone  Electronics    600         1
4      105       Jeans      Fashion     80         4

SQL Data:
    OrderID

In [4]:
!pip install openpyxl





[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip
