
### Question 5: Async Data Pipeline
- Modify Question 3 to write data to the database **asynchronously** .
- Read from the database 5 times *concurrantly* using **async** (hint: `asyncio.gather()`)

In [57]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
import asyncio
import aiosqlite
from functools import wraps
from datetime import datetime

In [58]:
connection = sqlite3.connect('metal_prices.db')
cursor = connection.cursor()

In [59]:
cursor.execute("SELECT name FROM sqlite_master WHERE type = 'table';").fetchall()

[('LME_Copper_3MO',),
 ('LME_Aluminum_3MO',),
 ('LME_Zinc_3MO',),
 ('LME_Lead_3MO',),
 ('LME_Tin_3MO',),
 ('Generic_CL_Future',)]

In [60]:
## defining the decorator to log sql insert executions
def log_execution(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        print(f"Executing {func.__name__} at {datetime.now()}")
        result = func(*args, **kwargs)
        print(f"Execution completed at {datetime.now()}")
        return result
    return wrapper

In [61]:
## function that adds the desired columns to the desired tables

def alter_metal(connection, tables, cols):
    cursor = connection.cursor()

    for table in tables:
        cursor.execute(f"PRAGMA table_info({table})")
        existing_columns = [row[1] for row in cursor.fetchall()]  
        for col in cols:
            if col not in existing_columns:
                cursor.execute(f"ALTER TABLE {table} ADD COLUMN {col} REAL DEFAULT NULL")
                print(f"Added column {col} to table {table}.")
            else:
                print(f"Column {col} already exists in table {table}.")

    connection.commit()
    cursor.close()


def main():
    connection = sqlite3.connect('metal_prices.db')
    
    tables = ['LME_Zinc_3MO', 'LME_Copper_3MO']
    cols = ['MACD', 'Signal_Line', 'RSI']

    alter_metal(connection, tables, cols)

    connection.close()

main()


Column MACD already exists in table LME_Zinc_3MO.
Column Signal_Line already exists in table LME_Zinc_3MO.
Column RSI already exists in table LME_Zinc_3MO.
Column MACD already exists in table LME_Copper_3MO.
Column Signal_Line already exists in table LME_Copper_3MO.
Column RSI already exists in table LME_Copper_3MO.


In [62]:
## chekcing that the columns have been added correctly
print(cursor.execute("PRAGMA table_info('LME_Copper_3MO')").fetchall())
print(cursor.execute("PRAGMA table_info('LME_Zinc_3MO')").fetchall())

[(0, 'date', 'DATE', 0, None, 1), (1, 'price', 'REAL', 0, None, 0), (2, 'MACD', 'REAL', 0, 'NULL', 0), (3, 'Signal_Line', 'REAL', 0, 'NULL', 0), (4, 'RSI', 'REAL', 0, 'NULL', 0)]
[(0, 'date', 'DATE', 0, None, 1), (1, 'price', 'REAL', 0, None, 0), (2, 'MACD', 'REAL', 0, 'NULL', 0), (3, 'Signal_Line', 'REAL', 0, 'NULL', 0), (4, 'RSI', 'REAL', 0, 'NULL', 0)]


In [63]:
## executes the SQL update statement and commits the changes asynchronously
@log_execution
async def update_metal_data(connection, metal, Date, MACD=None, Signal_Line=None, RSI=None):
    Date = Date.strftime('%Y-%m-%d')
    async with connection.cursor() as cursor:
        await cursor.execute(f"""
            UPDATE {metal}
            SET MACD = ?, Signal_Line = ?, RSI = ?
            WHERE Date = ?
        """, (MACD, Signal_Line, RSI, Date))
        await connection.commit()
        
## asynchronously adds to the update_tasks list, once all the rows are processed, gathers and runs concurrently
@log_execution
async def update_data_from_dataframe(connection, dataframe, metals):
    update_tasks = []

    for index, row in dataframe.iterrows():
        Date = row.name
        MACD = row['MACD']
        Signal_Line = row['Signal Line']
        RSI = row['RSI']
        for metal in metals:
            update_tasks.append(
                update_metal_data(connection, metal, Date, MACD, Signal_Line, RSI)
            )

    await asyncio.gather(*update_tasks)

## defines the list of metals and df for each metal
## establishes an asynchronous connection to the db and creates tasks to update
## data for each metal - waits for all uodate tasks to be complete

async def main():
    metals = ['LME_Copper_3MO', 'LME_Zinc_3MO']
    dataframes = {'LME_Copper_3MO': copper_df, 'LME_Zinc_3MO': zinc_df}

    
    async with aiosqlite.connect('metal_prices.db') as connection:
        update_tasks = [
            update_data_from_dataframe(connection, df, [metal])
            for metal, df in dataframes.items()
        ]

        
        await asyncio.gather(*update_tasks)

## runs the main() function in an asychronous context
await main()


Executing update_data_from_dataframe at 2024-11-17 18:17:21.900314
Execution completed at 2024-11-17 18:17:21.900314
Executing update_data_from_dataframe at 2024-11-17 18:17:21.900314
Execution completed at 2024-11-17 18:17:21.900314
Executing update_metal_data at 2024-11-17 18:17:21.900968
Execution completed at 2024-11-17 18:17:21.900968
Executing update_metal_data at 2024-11-17 18:17:21.900968
Execution completed at 2024-11-17 18:17:21.900968
Executing update_metal_data at 2024-11-17 18:17:21.900968
Execution completed at 2024-11-17 18:17:21.900968
Executing update_metal_data at 2024-11-17 18:17:21.900968
Execution completed at 2024-11-17 18:17:21.900968
Executing update_metal_data at 2024-11-17 18:17:21.900968
Execution completed at 2024-11-17 18:17:21.900968
Executing update_metal_data at 2024-11-17 18:17:21.900968
Execution completed at 2024-11-17 18:17:21.900968
Executing update_metal_data at 2024-11-17 18:17:21.900968
Execution completed at 2024-11-17 18:17:21.900968
Executing 

In [64]:
## reads and returns the data from the database for a specified metal between a start and end date.
@log_execution
async def read_data_from_db(connection, metal, start_date, end_date):
    async with connection.cursor() as cursor:
        result = await cursor.execute(f"""
            SELECT Date, MACD, Signal_Line, RSI
            FROM {metal}
            WHERE Date BETWEEN ? AND ?
        """, (start_date, end_date))
        rows = await result.fetchall()
        return rows
## main function the reads data for multiple metals from the database asynchronously
async def main():
    metals = ['LME_Copper_3MO', 'LME_Zinc_3MO']
    start_date = '2020-01-01'
    end_date = '2021-12-31'

    async with aiosqlite.connect('metal_prices.db') as connection:
        read_tasks = [
            read_data_from_db(connection, metals[0], start_date, end_date) for _ in range(5)
        ]
        results = await asyncio.gather(*read_tasks)

        for i, result in enumerate(results, 1):
            print(f"Read {i}: {result}")

await main()

Executing read_data_from_db at 2024-11-17 18:17:22.484587
Execution completed at 2024-11-17 18:17:22.484587
Executing read_data_from_db at 2024-11-17 18:17:22.484587
Execution completed at 2024-11-17 18:17:22.484587
Executing read_data_from_db at 2024-11-17 18:17:22.484587
Execution completed at 2024-11-17 18:17:22.484587
Executing read_data_from_db at 2024-11-17 18:17:22.484587
Execution completed at 2024-11-17 18:17:22.484587
Executing read_data_from_db at 2024-11-17 18:17:22.484587
Execution completed at 2024-11-17 18:17:22.484587
Read 1: [('2020-01-01', 0.0, 0.0, None), ('2020-01-02', 1.1168091168083265, 0.22336182336166532, None), ('2020-01-03', -2.687591821496426, -0.35882890560995295, None), ('2020-01-06', -4.919671282315903, -1.270997380951143, None), ('2020-01-07', -5.774780019056379, -2.17175390857219, None), ('2020-01-08', -4.065537234234398, -2.550510573704632, None), ('2020-01-09', -2.5205118360490815, -2.544510826173522, None), ('2020-01-10', 0.15459936578827183, -2.00468