In [203]:
import pandas as pd
import config
import os
from fmp_python.fmp import FMP
import pprint
import ssl
import time

from functions import trim_and_lower

from urllib.request import urlopen
from urllib.parse import urlencode
import certifi
import json
from IPython.display import Image, display

import plotly.express as px

In [321]:
# Create a custom SSL context
context = ssl.create_default_context(cafile=certifi.where())

# Now you can access the environment variables
apikey = os.getenv('FMP_SECRET_KEY')
#fmp = FMP(api_key=apikey, output_format='pandas')
#fmp = FMP(api_key=apikey, output_format='json')


def company_profile_url(ticker, apikey=apikey):
    endpoint = 'https://financialmodelingprep.com/api/v3/profile/'
    url = f'{endpoint}{ticker}?apikey={apikey}'
    return url

def stock_screener(apikey=apikey, **kwargs):
    endpoint = 'https://financialmodelingprep.com/api/v3/stock-screener'
    params = {'apikey': apikey}
    params.update(kwargs)
    query_string = urlencode(params)
    url = f'{endpoint}?{query_string}'
    print(url)
    return url

def full_quote_url(ticker, apikey=apikey):
    endpoint = 'https://financialmodelingprep.com/api/v3/quote/'
    url = f'{endpoint}{ticker}?apikey={apikey}'
    return url

def historical_url(ticker, apikey=apikey):
    endpoint = 'https://financialmodelingprep.com/api/v3/historical-price-full/'
    url = f'{endpoint}{ticker}?apikey={apikey}'
    return url

def technical_indicator_url(timeframe, ticker, ind_type, period, apikey=apikey):
    endpoint = 'https://financialmodelingprep.com/api/v3/technical_indicator/'
    ticker = ticker
    url = f'{endpoint}{timeframe}/{ticker}?type={ind_type}&period={period}&apikey={apikey}'
    return url

def historical_rating_url(ticker, apikey=apikey):
    endpoint = 'https://financialmodelingprep.com/api/v3/historical-rating/'
    url = f'{endpoint}{ticker}?apikey={apikey}'
    return url

def get_jsonparsed_data(url, retries=3, timeout=10):
    """
    Fetch JSON data from the provided URL and return it as a Python dictionary.
    
    Parameters
    ----------
    url : str
        The URL to fetch data from.
    retries : int
        The number of retries for the request in case of failure.
    timeout : int
        The timeout for the request in seconds.

    Returns
    -------
    dict
        The parsed JSON data.
    """
    context = ssl.create_default_context(cafile=certifi.where())
    for attempt in range(retries):
        try:
            with urlopen(url, context=context, timeout=timeout) as response:
                data = response.read().decode("utf-8")
                return json.loads(data)
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            if attempt < retries - 1:
                time.sleep(2 ** attempt)  # Exponential backoff
            else:
                raise

def display_company_logo(ticker, apikey=apikey, retries=3, timeout=10):
    endpoint = 'https://financialmodelingprep.com/image-stock/'
    url = f'{endpoint}{ticker}.png?apikey={apikey}'
    print(url)
    for attempt in range(retries):
        try:
            with urlopen(url, context=context) as response:
                image_data = response.read()
                display(Image(image_data))
        except Exception as e:
            print(f"Error fetching image: {e}")


def get_indicators(ticker, indicators, timeframe='1day', period=14, apikey=apikey):
    urls = {}
    for indicator in indicators:
        url = technical_indicator_url(timeframe, ticker, indicator, period, apikey)
        urls[indicator] = url
    
    ind_data = {}
    for key, value in urls.items():
        data = get_jsonparsed_data(value)
        filtered_data = [{'date': entry['date'], key: entry[key]} for entry in data if key in entry]
        ind_data[key] = filtered_data
    
    df_list = []
    for indicator, data in ind_data.items():
        df = pd.DataFrame(data)
        df = df.set_index('date')
        df_list.append(df)

    final_df = pd.concat(df_list, axis=1, join='outer')
    final_df.index = pd.to_datetime(indicator_df.index)
        
    return final_df

def stock_summary_table(stock_data):
    columns = stock_data[0]
    table = pd.DataFrame(stock_data)[['symbol', 'name', 'price', 'marketCap', 'exchange']]
    melted = table.melt().set_index('variable', drop=True)
    return melted


def get_all_data(tickers):
    data = []
    for ticker in tickers:
        historical_data = get_jsonparsed_data(historical_url(ticker))
        data.append(historical_data)

    # List of keys to keep in each dictionary
    keys_to_keep = ['date', 'open', 'high', 'low', 'close', 'volume', 'vwap']

    for item in data:
        historical = item['historical']
        symbol = item['symbol']

        # Modify each entry in the historical list
        for entry in historical:
            entry['symbol'] = symbol
            # Keep only the necessary keys
            for key in list(entry.keys()):
                if key not in keys_to_keep and key != 'symbol':
                    del entry[key]
    
    return data


def lagging_features_target(df):
    df = df.copy()
    df['minus_10_price'] = df.close.shift(10)
    df['minus_5_price'] = df.close.shift(5)
    df['minus_5_price'] = df.close.shift(5)
    df['minus_4_price'] = df.close.shift(4)
    df['minus_3_price'] = df.close.shift(3)
    df['minus_2_price'] = df.close.shift(2)
    df['target'] = df.close.shift(-3)
    df = df.dropna()
    return df

In [332]:
watchlist = ['AMZN', 'AAPL', 'GOOG', 'META','MSFT', 'NVDA', 'TSLA']
#watchlist = ['AMZN', 'AAPL']
timeframes = ['1min', '5min', '15min', '30min', '1hour', '4hour', '1day']
indicators = ['dema', 'tema', 'williams', 'rsi', 'adx']
#indicators = ['sma', 'ema', 'wma', 'dema', 'tema', 'williams', 'rsi', 'adx']

In [None]:
indicator_df = get_indicators('TTD', indicators)

In [85]:
hist_rating_url = historical_rating_url('TTD')
hist_rating = get_jsonparsed_data(hist_rating_url)

In [None]:
hist_rating = pd.DataFrame(hist_rating)[['date', 'ratingScore']]
hist_rating = hist_rating.set_index('date', drop=True)
hist_rating.index = pd.to_datetime(hist_rating.index)

In [87]:
hist_url = historical_url('TTD')
hist = get_jsonparsed_data(hist_url)['historical']

In [None]:
hist = pd.DataFrame(hist).set_index('date', drop=True)
hist.index = pd.to_datetime(hist.index)
hist = hist.drop(columns=['unadjustedVolume', 'adjClose', 'label', 'changeOverTime'])

In [None]:
df = pd.concat([hist, indicator_df, hist_rating], axis=1).sort_index()
df = df.dropna()
df.head()

In [110]:
df.shape

(1217, 17)

In [169]:
train_df = df.copy()

In [170]:
train_df = train_df.asfreq('D', method='ffill')
train_df.shape

(1823, 17)

In [None]:
train_df['minus_10_price'] = train_df.close.shift(10)
train_df['minus_5_price'] = train_df.close.shift(5)
train_df['minus_4_price'] = train_df.close.shift(4)
train_df['minus_3_price'] = train_df.close.shift(3)
train_df['minus_2_price'] = train_df.close.shift(2)
train_df['target'] = train_df.close.shift(-3)
train_df = train_df.dropna()
train_df.tail(11)

In [172]:
train_df.to_csv('../data/clean/train.csv', index=True)

In [322]:
all_historical_data = get_all_data(watchlist)

In [323]:
amazon = pd.DataFrame(all_historical_data[0]['historical']).set_index('date', drop=True)
amazon.index = pd.to_datetime(amazon.index)

apple = pd.DataFrame(all_historical_data[1]['historical']).set_index('date', drop=True)
apple.index = pd.to_datetime(apple.index)

alphabet = pd.DataFrame(all_historical_data[2]['historical']).set_index('date', drop=True)
alphabet.index = pd.to_datetime(alphabet.index)

meta = pd.DataFrame(all_historical_data[3]['historical']).set_index('date', drop=True)
meta.index = pd.to_datetime(meta.index)

microsoft = pd.DataFrame(all_historical_data[4]['historical']).set_index('date', drop=True)
microsoft.index = pd.to_datetime(microsoft.index)

nvidia = pd.DataFrame(all_historical_data[5]['historical']).set_index('date', drop=True)
nvidia.index = pd.to_datetime(nvidia.index)

tesla = pd.DataFrame(all_historical_data[6]['historical']).set_index('date', drop=True)
tesla.index = pd.to_datetime(tesla.index)

In [324]:
amazon.head()

Unnamed: 0_level_0,open,high,low,close,volume,vwap,symbol
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2024-06-28,197.73,198.85,192.5,193.25,76897093,195.5825,AMZN
2024-06-27,195.01,199.84,194.2,197.85,74397491,196.725,AMZN
2024-06-26,186.92,194.8,186.26,193.61,65103893,190.3975,AMZN
2024-06-25,186.81,188.84,185.42,186.34,45898475,186.8525,AMZN
2024-06-24,189.33,191.0,185.33,185.57,50610379,187.8075,AMZN


In [325]:
amazon_indicators = get_indicators(amazon.symbol.iloc[0], indicators)
amazon_rating = pd.DataFrame(get_jsonparsed_data(historical_rating_url(amazon.symbol.iloc[0])))[['date', 'ratingScore']]
amazon_rating.date = pd.to_datetime(amazon_rating.date)
amazon_rating = amazon_rating.set_index('date', drop=True)
amazon_training_data = pd.concat([amazon, amazon_indicators, amazon_rating], axis=1).sort_index().dropna()
amazon_training_data = amazon_training_data.asfreq('D', method='ffill')
amazon_training_data = lagging_features_target(amazon_training_data)
amazon_training_data.to_csv('../data/clean/amazon_training_data.csv', index=True)

In [326]:
apple_indicators = get_indicators(apple.symbol.iloc[0], indicators)
apple_rating = pd.DataFrame(get_jsonparsed_data(historical_rating_url(apple.symbol.iloc[0])))[['date', 'ratingScore']]
apple_rating.date = pd.to_datetime(apple_rating.date)
apple_rating = apple_rating.set_index('date', drop=True)
apple_training_data = pd.concat([apple, apple_indicators, apple_rating], axis=1).sort_index().dropna()
apple_training_data = apple_training_data.asfreq('D', method='ffill')
apple_training_data = lagging_features_target(apple_training_data)
apple_training_data.to_csv('../data/clean/apple_training_data.csv', index=True)

In [327]:
alphabet_indicators = get_indicators(alphabet.symbol.iloc[0], indicators)
alphabet_rating = pd.DataFrame(get_jsonparsed_data(historical_rating_url(alphabet.symbol.iloc[0])))[['date', 'ratingScore']]
alphabet_rating.date = pd.to_datetime(alphabet_rating.date)
alphabet_rating = alphabet_rating.set_index('date', drop=True)
alphabet_training_data = pd.concat([alphabet, alphabet_indicators, alphabet_rating], axis=1).sort_index().dropna()
alphabet_training_data = alphabet_training_data.asfreq('D', method='ffill')
alphabet_training_data = lagging_features_target(alphabet_training_data)
alphabet_training_data.to_csv('../data/clean/alphabet_training_data.csv', index=True)

In [328]:
meta_indicators = get_indicators(meta.symbol.iloc[0], indicators)
meta_rating = pd.DataFrame(get_jsonparsed_data(historical_rating_url(meta.symbol.iloc[0])))[['date', 'ratingScore']]
meta_rating.date = pd.to_datetime(meta_rating.date)
meta_rating = meta_rating.set_index('date', drop=True)
meta_training_data = pd.concat([meta, meta_indicators, meta_rating], axis=1).sort_index().dropna()
meta_training_data = meta_training_data.asfreq('D', method='ffill')
meta_training_data = lagging_features_target(meta_training_data)
meta_training_data.to_csv('../data/clean/meta_training_data.csv', index=True)

In [329]:
microsoft_indicators = get_indicators(microsoft.symbol.iloc[0], indicators)
microsoft_rating = pd.DataFrame(get_jsonparsed_data(historical_rating_url(microsoft.symbol.iloc[0])))[['date', 'ratingScore']]
microsoft_rating.date = pd.to_datetime(microsoft_rating.date)
microsoft_rating = microsoft_rating.set_index('date', drop=True)
microsoft_training_data = pd.concat([microsoft, microsoft_indicators, microsoft_rating], axis=1).sort_index().dropna()
microsoft_training_data = microsoft_training_data.asfreq('D', method='ffill')
microsoft_training_data = lagging_features_target(microsoft_training_data)
microsoft_training_data.to_csv('../data/clean/microsoft_training_data.csv', index=True)

In [330]:
nvidia_indicators = get_indicators(nvidia.symbol.iloc[0], indicators)
nvidia_rating = pd.DataFrame(get_jsonparsed_data(historical_rating_url(nvidia.symbol.iloc[0])))[['date', 'ratingScore']]
nvidia_rating.date = pd.to_datetime(nvidia_rating.date)
nvidia_rating = nvidia_rating.set_index('date', drop=True)
nvidia_training_data = pd.concat([nvidia, nvidia_indicators, nvidia_rating], axis=1).sort_index().dropna()
nvidia_training_data = nvidia_training_data.asfreq('D', method='ffill')
nvidia_training_data = lagging_features_target(nvidia_training_data)
nvidia_training_data.to_csv('../data/clean/nvidia_training_data.csv', index=True)

In [331]:
tesla_indicators = get_indicators(tesla.symbol.iloc[0], indicators)
tesla_rating = pd.DataFrame(get_jsonparsed_data(historical_rating_url(tesla.symbol.iloc[0])))[['date', 'ratingScore']]
tesla_rating.date = pd.to_datetime(tesla_rating.date)
tesla_rating = tesla_rating.set_index('date', drop=True)
tesla_training_data = pd.concat([tesla, tesla_indicators, tesla_rating], axis=1).sort_index().dropna()
tesla_training_data = tesla_training_data.asfreq('D', method='ffill')
tesla_training_data = lagging_features_target(tesla_training_data)
tesla_training_data.to_csv('../data/clean/tesla_training_data.csv', index=True)

In [None]:
for col in train_df:
    fig = px.box(train_df, col)
    fig.show()

In [None]:
additional_params = {
    'limit': 10,
    'exchange': 'NASDAQ',
    'isEtf': False,
    'isFund': False,
    'isActivelyTrading': True
}

screener_url = stock_screener(**additional_params)
screener = get_jsonparsed_data(screener_url)
screener[0]

In [None]:
screener = pd.DataFrame(screener)
screener.sort_values(by='marketCap', ascending=False)