In [2]:
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()
db_pass = os.getenv('DB_PASS')

In [4]:
db_connection_str = f'postgresql+psycopg2://postgres:{db_pass}@localhost:5432/retail_db'
db_connection = create_engine(db_connection_str)
print("Connected")

Connected


In [5]:
from sqlalchemy import text

In [13]:
try:
    df = pd.read_csv('../data/cleaned_online_retail.csv')
    df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
    print(f"{len(df)} rows ready.")
except FileNotFoundError:
    print("File not located")

# Uploading to PostgreSQL
df.to_sql('transactions', con=db_connection, if_exists='replace', index=False)
print("Upload successful")

779495 rows ready.
Upload successful


In [14]:
# Monthly Trends (Revenue & Order Count over time)
query_monthly = """
CREATE OR REPLACE VIEW v_monthly_summary AS
SELECT
    TO_CHAR("InvoiceDate", 'YYYY-MM') as Month,
    SUM("TotalAmount") as Revenue,
    COUNT(DISTINCT "Invoice") as Order_Count
FROM transactions
GROUP BY 1
ORDER BY 1;
"""

# Top Products (Best sellers by revenue)
query_products = """
CREATE OR REPLACE VIEW v_product_summary AS
SELECT 
    "Description" as Product,
    SUM("Quantity") as Units_Sold,
    SUM("TotalAmount") as Revenue
FROM transactions
GROUP BY 1
ORDER BY 3 DESC;
"""

In [15]:
with db_connection.connect() as conn:
    conn.execute(text(query_monthly))
    conn.execute(text(query_products))
    conn.commit()
print("Database ready")

Database ready
