# Imports & Setup

In [14]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
import os
import streamlit as st
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as sk
import requests
from io import StringIO
from dotenv import load_dotenv
from urllib.parse import urlencode

# Uncomment when ready for machine learning:
# from sklearn.model_selection import train_test_split
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dense, Dropout
# from tensorflow.keras.callbacks import EarlyStopping

In [16]:
# Pulling up our .env file:
load_dotenv()

True

# Data Preprocessing

## Declarations and Functions

In [17]:
# Sets and Parameters

function_set = ['TIME_SERIES_DAILY', 'RSI', 'NEWS_SENTIMENT', 'REAL_GDP']
sa_function_parameters = ['technology', 'retail_wholesale']
premium_function_set = ['MACD']

parameters = {
    'TIME_SERIES_DAILY': {'function': 'TIME_SERIES_DAILY',
                          'symbol': None,
                          'outputsize': 'compact',
                          'datatype': None,
                          'apikey': os.getenv('ALPHAVANTAGE_API_KEY')},
    'NEWS_SENTIMENT': {'function': 'NEWS_SENTIMENT',
                       'tickers': None,
                       'limit': 1000,
                       'apikey': os.getenv('ALPHAVANTAGE_API_KEY')},
    'RSI': {'function':'RSI',
            'symbol': None,
            'interval': 'daily',
            'time_period': 14,
            'series_type': 'close',
            'datatype': None,
            'apikey': os.getenv('ALPHAVANTAGE_API_KEY')},
    'REAL_GDP': {'function': 'REAL_GDP',
                 'interval':'quarterly',
                 'datatype': None,
                 'apikey': os.getenv('ALPHAVANTAGE_API_KEY')}
}

In [18]:
# Data Import Function

def alpha_multicall(function_set, symbol: str, datatype: str, base_query='https://www.alphavantage.co/query?'):

    # Conditional to verify that "symbol" and "datatype" are strings:
    if not isinstance(symbol, str) or not isinstance(datatype, str):
        raise TypeError(f'Both the "symbol" and "datatype" parameters must be strings.\nSYMBOL: {str(type(symbol)).upper()}\nDATATYPE: {str(type(datatype)).upper()}')

    # DataFrame list to be converted to a tuple before being returned to the user:
    dataframes = []

    # The loop that looks through the "parameters" dictionary and verifies if "symbol" and "datatype" keys are present:
    for function in function_set:
        if 'symbol' and 'datatype' in parameters[function].keys():
            parameters[function]['symbol'] = symbol
            parameters[function]['datatype'] = datatype
        elif not 'symbol' and 'datatype' in parameters[function].keys():
            parameters[function]['datatype'] = datatype
        else:
            parameters[function]['symbol'] = symbol

    # The loop that makes the call for each function defined in the function set:
    for function in function_set:
        if function == 'NEWS_SENTIMENT':
            parameters[function]['tickers'] = symbol
            df = extract_sentiment(pd.DataFrame(requests.get(base_query + urlencode(parameters[function])).json()), symbol)
            dataframes.append(df)
        else:
            df = pd.read_csv(StringIO(requests.get(base_query + urlencode(parameters[function])).text))
            dataframes.append(df)

    # Returns a tuple so we can unpack all the returned DataFrames into separate objects:
    return tuple(dataframes)

In [19]:
# Sentiment Exctraction Function:

def extract_sentiment(df, symbol: str):

    # Conditional to verify that "symbol" is a string:
    if not isinstance(symbol, str):
        raise TypeError(f'The "symbol" parameter must be a strings; it\'s currently {str(type(symbol)).upper()}.')

    # Declaration of an empty DataFrames for extracting data and merging before return:
    df_time = pd.DataFrame(columns=['id', 'time_published'])
    df_sentiment = pd.DataFrame(columns=['id', 'relevance_score', 'ticker_sentiment_score', 'ticker_sentiment_label'])

    # Declaring the amount of returned articles to be looped over:
    count = df.shape[0]

    # Loop that pulls the "time_published", "relevance_score", "ticker_sentiment_score", and "ticker_sentiment_label" for the given stock symbol
    # from every row of the "NEWS_SENTIMENT" DataFrame:
    for id in range(count):
        time_row = pd.DataFrame({
            'id': [id],
            'time_published': [df.loc[id, 'feed']['time_published']]
        })
        df_time = pd.concat([df_time, time_row], ignore_index=True)
        for sentiment in df.loc[id, 'feed']['ticker_sentiment']:
            if sentiment['ticker'] == symbol:
                sentiment_row = pd.DataFrame({
                    'id': [id],
                    'relevance_score': [sentiment['relevance_score']],
                    'ticker_sentiment_score':[sentiment['ticker_sentiment_score']],
                    'ticker_sentiment_label': [sentiment['ticker_sentiment_label']]
                })
                df_sentiment = pd.concat([df_sentiment, sentiment_row], ignore_index=True)

    merged_df = df_time.merge(df_sentiment, on='id').drop(columns='id')

    return merged_df

## First Set

In [24]:
aapl_tsd, aapl_rsi, aapl_ns, gdp = alpha_multicall(function_set, 'AAPL', 'csv')

In [25]:
aapl_tsd.head(20)

Unnamed: 0,timestamp,open,high,low,close,volume
0,2024-03-15,171.17,172.62,170.285,172.62,121752699
1,2024-03-14,172.91,174.3078,172.05,173.0,72571635
2,2024-03-13,172.77,173.185,170.76,171.13,51948951
3,2024-03-12,173.15,174.03,171.01,173.23,59544927
4,2024-03-11,172.94,174.38,172.05,172.75,58929918
5,2024-03-08,169.0,173.7,168.94,170.73,76267041
6,2024-03-07,169.15,170.73,168.49,169.0,71765061
7,2024-03-06,171.06,171.24,168.68,169.12,68587707
8,2024-03-05,170.76,172.04,169.62,170.12,95132355
9,2024-03-04,176.15,176.9,173.79,175.1,81510101


In [26]:
aapl_rsi.head()

Unnamed: 0,time,RSI
0,2024-03-15,37.0717
1,2024-03-14,37.7247
2,2024-03-13,32.273
3,2024-03-12,35.5151
4,2024-03-11,34.1102


In [27]:
aapl_ns.head()

Unnamed: 0,time_published,relevance_score,ticker_sentiment_score,ticker_sentiment_label
0,20240316T190500,0.752195,0.434492,Bullish
1,20240316T141500,0.75914,0.229998,Somewhat-Bullish
2,20240316T130300,0.47672,0.434252,Bullish
3,20240316T125411,0.183198,0.064715,Neutral
4,20240316T121500,0.155201,0.203733,Somewhat-Bullish


## Second Set