# Question 3 Solutions

## Setup and Initialisation
Importing libraries and setting up the logging and creating a connection to the 'master' database created in question 2.

In [None]:
#import necessary libraries
import pandas as pd
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
import pandas_ta as ta
import logging, functools

# setup logging format
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# decorator to log SQL insert operations
def log_insert(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        logging.info("Starting SQL insert operation.")
        result = func(*args, **kwargs)
        logging.info("Completed SQL insert operation.")
        return result
    return wrapper

#create connection to 'master' database
engine = create_engine(
    "mssql+pyodbc://sa:!Hartree123!@localhost:1433/master?"
    "driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=yes"
)

#create a session object for executing SQL commands
Session = sessionmaker(bind=engine)
session = Session()

## Laod and Clean CSV Data
This is the same code as my solutions to question 1. Consisting of data processing and conversion such as the dates into datetime. Then I added a last with statement that opens a temporary database connection to delete all data within the table if the code was already ran, so we dont have duplicated data.

In [None]:
# Load CSV and clean it

#load the marketdata file without headers
df = pd.read_csv('/Users/giacomofiorani/Desktop/Hartree/OilDesk-Intern-Assessment/data/MarketData.csv', header=None)

#drop the first 7 irrelevent rows and resetting the index
df = df.iloc[7:].reset_index(drop=True)

#renaming columns to simpler names
df.columns = ['Dates', 'COPPER', 'ALUMINIUM', 'ZINC', 'LEAD', 'TIN', 'FUTURE']

#converting dates column to datetime format
df['Dates'] = pd.to_datetime(df['Dates'], dayfirst=True)

#delete all exisiting records - this is only if the code is ran multiple times so that data is not duplicated
with engine.connect() as conn:
    conn.execute(text("DELETE FROM metal_prices"))
    conn.commit()


## Ensuring Required Columns Exist in Database Table
In the following code I ensure that the additional columsn required for storing MACD and RSI values exist in the metal_prices table which is within the master database. This is because when pandas_ta computes the macd and rsi it gives by default the columns a complex naming format which makes it harder to work with. Therefore, I preset them to those namings, for MACD line, signal line and the histogram. IF they already exist then they are not added, avoiding duplication and ensuring the database integrity.

In [None]:
# Ensuring additional columns exist in metal_prices table within master database
with engine.connect() as conn:
    #list of columns to add/check
    for col in [
        'macd_fast', 'macds_fast', 'macdh_fast',
        'macd_med', 'macds_med', 'macdh_med',
        'macd_slow', 'macds_slow', 'macdh_slow',
        'rsi'
    ]:
        #check if column already exists in table
        result = conn.execute(text(f"""
            SELECT COUNT(*) FROM INFORMATION_SCHEMA.COLUMNS
            WHERE TABLE_NAME = 'metal_prices' AND COLUMN_NAME = '{col}'
        """)).scalar()

        #if not present then add the column to the table
        if result == 0:
            conn.execute(text(f"ALTER TABLE metal_prices ADD {col} FLOAT"))
            print(f"{col} sucessfully added")
        else:
            print(f"{col} already exists")
    #commit all changes
    conn.commit()
#confirmation message
print("Database and table setup complete.")

## Defining Insert Function to Populate metal_prices Table
IN the following code i created a dedicated insert_record() function to handlde insertion of each row into the metal_prices SQL table. Used a decorator: @log_insert which is applied to log each insert operation for better traceability and debuggin of the data pipeline. That function accepts each data point: date, metal, price and the calcualted indicators, and then inserts it into the database using a SQl insert statement. I also placed a helper method to ensure that all the NaN values from the DF are converted into None, otherwise SQL doesnt see it as missing data, raising erros. Lastly, each record is addded correclty and permanently saved into the database.

In [None]:
# Insert function
@log_insert # decorator to log insert operations
def insert_record(date, metal, price, macd_fast, macds_fast, macdh_fast,
                  macd_med, macds_med, macdh_med,
                  macd_slow, macds_slow, macdh_slow, rsi):
   
    # helper to convert NaN to None for SQL compatibility
    def safe_value(val):
        return None if pd.isna(val) else val

    # defining sQL insert statement for metal_price table
    insert_stmt = text("""
        INSERT INTO metal_prices (
            date, metal, price,
            macd_fast, macds_fast, macdh_fast,
            macd_med, macds_med, macdh_med,
            macd_slow, macds_slow, macdh_slow,
            rsi)
        VALUES (
            :date, :metal, :price,
            :macd_fast, :macds_fast, :macdh_fast,
            :macd_med, :macds_med, :macdh_med,
            :macd_slow, :macds_slow, :macdh_slow,
            :rsi)
    """)

    #execute the insert statement with safely cleaned values - convert Nan to None
    session.execute(insert_stmt, {
        'date': safe_value(date),
        'metal': safe_value(metal),
        'price': safe_value(price),
        'macd_fast': safe_value(macd_fast),
        'macds_fast': safe_value(macds_fast),
        'macdh_fast': safe_value(macdh_fast),
        'macd_med': safe_value(macd_med),
        'macds_med': safe_value(macds_med),
        'macdh_med': safe_value(macdh_med),
        'macd_slow': safe_value(macd_slow),
        'macds_slow': safe_value(macds_slow),
        'macdh_slow': safe_value(macdh_slow),
        'rsi': safe_value(rsi)
    })
    #commit it to the database
    session.commit()

## Processing and Inserting Copper and Zinc, 2020 and 2021 Data into SQl Table
In the following code i process and insert only the data of Copper and Zinc from the cleaned dataframe. Firslty, i filter the data to only include 2020 and 2021, then i compute the technical indicators RSI and MACD. To get a borader analysis i computed the MACD with three differnet configurations, fast, medium and slow. I used the fast values from industry standards and then the medium and slow are strategic extensions.

In [None]:
# Process and insert only COPPER and ZINC
for metal in ['COPPER', 'ZINC']:
    
    #create temperorary dataframe with dates and current price of metals
    df_temp = df[['Dates', metal]].copy()
    df_temp = df_temp.rename(columns={metal: 'price'})

    # filterring data for years 2020 and 2021
    df_temp = df_temp[(df_temp['Dates'].dt.year == 2020) | (df_temp['Dates'].dt.year == 2021)]

    # ensure price is numeric (float)
    df_temp['price'] = pd.to_numeric(df_temp['price'], errors='coerce')

    # Calculate indicators
    #calculate rsi indicator (14 day period)
    df_temp['rsi'] = ta.rsi(df_temp['price'], length=14) 
    
    #calculate MACD values for 3 different configurations
    macd_fast = ta.macd(df_temp['price'], fast=12, slow=26, signal=9) 
    macd_med = ta.macd(df_temp['price'], fast=19, slow=39, signal=9)
    macd_slow = ta.macd(df_temp['price'], fast=26, slow=52, signal=9)

    # Add MACD fast configuration columns
    df_temp['macd_fast'] = macd_fast['MACD_12_26_9']
    df_temp['macds_fast'] = macd_fast['MACDs_12_26_9']
    df_temp['macdh_fast'] = macd_fast['MACDh_12_26_9']

    # Add MACD medium configuration columns
    df_temp['macd_med'] = macd_med['MACD_19_39_9']
    df_temp['macds_med'] = macd_med['MACDs_19_39_9']
    df_temp['macdh_med'] = macd_med['MACDh_19_39_9']
    
    # Add MACD slow configuration columns
    df_temp['macd_slow'] = macd_slow['MACD_26_52_9']
    df_temp['macds_slow'] = macd_slow['MACDs_26_52_9']
    df_temp['macdh_slow'] = macd_slow['MACDh_26_52_9']

    #insert each row into the SQL table using the insert_record function
    for _, row in df_temp.iterrows():
        insert_record(
            row['Dates'], metal, row['price'],
            row['macd_fast'], row['macds_fast'], row['macdh_fast'],
            row['macd_med'], row['macds_med'], row['macdh_med'],
            row['macd_slow'], row['macds_slow'], row['macdh_slow'],
            row['rsi']
        )
#printing message to let the user know its been successfully inserted
print("All 2020 and 2021 Copper and Zinc records successfully inserted.")