# Stock fetching, Cleaning, and Storage
#### This notebook is going to:

###### Fetch 3 non consecutive 2 month slices of Stock data for 2 companies from the year 2024, this example will use APPLE and Amazon
###### Each of these months will be proccessed differently
###### The first month will be written directly to a SQL table
###### The second month will be written to a JSON file
###### The third month will be written to a CSV file

###### We will then load and randomly scramble each of the APPLE files before appending them the SQL table

###### Finally we are going to build a dataframe from the SQL table, determine the missing months, make the appropriate API calls to retrieve the missing months, complete the data frame and update the SQL table

### Imports & Environment Variables

In [None]:
import sys
import os
from pathlib import Path
from dotenv import load_dotenv
import pandas as pd

sys.path.append(os.path.abspath('..'))
# Import handlers
from scripts.handlers.stockHandler import AVStockDataHandler
from scripts.handlers.storageHandler import storageHandler
from scripts.handlers.helperHandler import helperHandler
from scripts.handlers.scrambleHandler import scrambleHandler
from scripts.handlers.SQLHandler import SQLHandler
from scripts.handlers.cleaningHandler import cleaningHandler

# Load environment variables
load_dotenv('../config.env')

# Retrieve the API key and DB_PATH from the environment variables
API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")
DB_PATH = os.getenv("DB_PATH")

# Raise an error if the API key or DB_PATH is not found
if not API_KEY:
    raise ValueError("API key not found. Please set the ALPHA_VANTAGE_API_KEY in config.env.")

if not DB_PATH:
    raise ValueError("Database path not found. Please set the DB_PATH in config.env.")

### Handlers, Tickers, And Date Ranges

In [None]:
# Initialize the handlers
stock_handler = AVStockDataHandler(API_KEY)
storage_handler = storageHandler()
SQL_handler = SQLHandler(DB_PATH)
scramble_handler = scrambleHandler()
helper_handler = helperHandler()
cleaning_handler = cleaningHandler()

# Tickers to fetch data for
tickers = ['AAPL', 'AMZN']

# Date range for fetching data
date_ranges = [
    ('2024-01-01', '2024-02-29'),
    ('2024-05-01', '2024-06-30'),
    ('2024-09-01', '2024-10-31')
]

### Fetching and Storing ticker data

In [None]:

# Fetch the data, 
sliced_ticker_data = stock_handler.fetch_multiple_tickers(tickers, date_ranges)

# Using heloperHandler to slice the data and create sets
sets = helper_handler.create_sets(sliced_ticker_data, date_ranges)

SQL_handler.save_dfs_to_table(sets[0])
storage_handler.multiple_dfs_to_csv_and_json(sets[1], file_type='csv')
storage_handler.multiple_dfs_to_csv_and_json(sets[2], file_type='json')

### File Retrieval

In [None]:
# Define regex patterns for locating AAPL csv and json files
AAPL_pattern = r".*AAPL.*"  # Matches any CSV file starting with "AAPL"

# Use the helper handler to locate files matching the patterns
aapl_files = helper_handler.find_files("raw_data", AAPL_pattern)

# Build dataframes from the located files
aapl_dfs = []
for file in aapl_files:
    df = storage_handler.df_builder(file)
    print(dt.head())
    aapl_dfs.append(df)
# Concatenate the dataframes into a single dataframe
aapl_df = pd.concat(aapl_dfs, ignore_index=True)

### Scrambling data

###### NOTE: This scramble method can return a new object but the operations are performed in place on the df

In [None]:
# Print the concatenated dataframe
print(aapl_df)

# Scramble the data
scramble_handler.scramble_df(aapl_df)

# Print the scrambled dataframe
print(aapl_df)
