## Data Processing
In this notebook I have processed the data (ETL), read it from the Azure Blob storage

In [87]:

import os
import sys
from urllib.parse import quote_plus
from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import pandas as pd
import pyodbc

In [None]:
DOTENV_PATH = "/Users/haseebsagheer/Documents/Python Learning/Cloud-Retail-Insights/secrets/.env"

# Force .env to override anything already in the process
if load_dotenv(dotenv_path=DOTENV_PATH, override=True):
    print("The .env file is loaded successfully.")
else:
    print("Warning: .env not found or could not be loaded.")

The .env file loaded successfully.


In [89]:
#This block of code will verify the SQL server login credentials
server   = os.getenv("SQL_SERVER")
database = os.getenv("SQL_DATABASE")
username = os.getenv("SQL_USERNAME")
password = os.getenv("SQL_PASSWORD")
print("Using SQL_SERVER   =", server)
print("Using SQL_DATABASE =", database)
print("Using SQL_USERNAME =", username)

if not all([server, database, username, password]):
    print("ERROR: Missing one or more of SQL_SERVER / SQL_DATABASE / SQL_USERNAME / SQL_PASSWORD")
    sys.exit(1)


Using SQL_SERVER   = sqlsrv-retail-dev.database.windows.net
Using SQL_DATABASE = sqldb-dretail-dev
Using SQL_USERNAME = sqladmin


In [90]:
DRIVER_PATH = "/opt/homebrew/lib/libmsodbcsql.18.dylib"

#Preparing the credentials for logging in the account (azure SQL Server)
odbc = (
    f"DRIVER={DRIVER_PATH};"
    f"SERVER={server};"
    f"DATABASE={database};"
    f"UID={username};"
    f"PWD={password};"
    "Encrypt=yes;"
    "TrustServerCertificate=no;"
    "Connection Timeout=30;"
)

conn_url = f"mssql+pyodbc:///?odbc_connect={quote_plus(odbc)}"

try:
    engine = create_engine(conn_url, fast_executemany=True)
    print("SQLAlchemy engine created successfully.")
except Exception as e:
    print("Error creating engine:", e)
    sys.exit(2)

SQLAlchemy engine created successfully.


In [None]:

try:
    print("Testing connection...")
    with engine.connect() as conn:
        df = pd.read_sql("SELECT * FROM dbo.stg_sales;", conn)
        if df.empty:
            print("Query returned 0 rows.")
        else:
            print(f"Query returned {len(df)} rows")
            

except:
    print("There was something wrong in getting data from Azure SQl Server")

Testing connection...
Query returned RowID           9800
OrderID         9800
OrderDate       9800
ShipDate        9800
ShipMode        9800
CustomerID      9800
CustomerName    9800
Segment         9800
Country         9800
City            9800
State           9800
PostalCode      9789
Region          9800
ProductID       9800
Category        9800
SubCategory     9800
ProductName     9800
Sales           9800
dtype: int64 rows
