In [66]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from datetime import date
import yfinance as yf
import datetime
from fredapi import Fred

def load_api_key(filepath):
    """Load and return the API key from a file."""
    with open(filepath, 'r') as file:
        api_key = file.read().strip()  # .strip() removes any leading/trailing whitespace
    return api_key


def build_model(window_size, feature_count, lstm_units, d, dense_units):
    model = Sequential()
    model.add(LSTM(lstm_units, input_shape=(window_size, feature_count), return_sequences=True))
    model.add(LSTM(lstm_units, return_sequences=True))
    model.add(LSTM(lstm_units))
    model.add(Dropout(d))
    model.add(Dense(dense_units, activation="relu", kernel_initializer="uniform"))
    model.add(Dense(1, activation="relu", kernel_initializer="uniform"))
    model.compile(loss='mse',optimizer='adam',metrics=['mae'])
    return model

def predict_aapl():
    # window_size: 60
    # lstm_unit: 64
    # lstm_dropout: 0.2
    # dense_units: 16
    # batch_size: 16
    # epochs: 200
    # training_error_rate: 0.00023464551486540586
    # training_accuracy: 0.006090724840760231
    # testing_error_rate: 2.2286689272732474e-05
    # testing_accuracy: 0.003547884291037917
    # MASE: 1.321271300315857
    model = build_model(60, 9, 64, 0.2, 16)
    model.load_weights('../weights/AAPL_best_model.weights.h5')
    return model

def predict_amzn():
    # window_size: 15
    # lstm_unit: 64
    # lstm_dropout: 0.1
    # dense_units: 16
    # batch_size: 16
    # epochs: 100
    # training_error_rate: 4.6920404201955535e-06
    # training_accuracy: 0.0013190218014642596
    # testing_error_rate: 0.0006488984799943864
    # testing_accuracy: 0.019547905772924423
    # MASE: 1.7842113971710205
    model = build_model(15, 9, 64, 0.1, 16)
    model.load_weights('../weights/AMZN_best_model.weights.h5')
    return model

def predict_googl():
    # window_size: 15
    # lstm_unit: 32
    # lstm_dropout: 0.2
    # dense_units: 16
    # batch_size: 16
    # epochs: 50
    # training_error_rate: 0.00014775595627725124
    # training_accuracy: 0.00714624160900712
    # testing_error_rate: 0.00036536858533509076
    # testing_accuracy: 0.013242176733911037
    # MASE: 1.8139164447784424
    model = build_model(15, 9, 32, 0.2, 16)
    model.load_weights('../weights/GOOGL_best_model.weights.h5')
    return model

def predict_msft():
    # window_size: 15
    # lstm_unit: 64
    # lstm_dropout: 0.1
    # dense_units: 32
    # batch_size: 32
    # epochs: 100
    # training_error_rate: 0.00011800717038568109
    # training_accuracy: 0.006529844831675291
    # testing_error_rate: 0.00017038073565345258
    # testing_accuracy: 0.008951040916144848
    # MASE: 1.142071008682251
    model = build_model(15, 9, 64, 0.1, 32)
    model.load_weights('../weights/MSFT_best_model.weights.h5')
    return np.random.uniform(300, 310)  # Replace with your model's prediction

def predict_nvda():
    # window_size: 15
    # lstm_unit: 32
    # lstm_dropout: 0.3
    # dense_units: 32
    # batch_size: 16
    # epochs: 50
    # training_error_rate: 0.00010817285510711372
    # training_accuracy: 0.004988082218915224
    # testing_error_rate: 0.0010140140075236559
    # testing_accuracy: 0.020503859966993332
    # MASE: 1.3169596195220947
    model = build_model(15, 9, 32, 0.3, 32)
    model.load_weights('../weights/NVDA_best_model.weights.h5')
    return model  # Replace with your model's prediction
    
# 'Close_sp500', 'Close_nasdaq', 
# 'Close_dow_jones','Close_tech_sector',
# 'open', 'high', 'low', 
# 'close', 'volume'

def get_data(api_key_path, stock_symbol):
    api_key = load_api_key(api_key_path)
    
    url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={stock_symbol}&outputsize=full&datatype=csv&apikey={api_key}'
    try:
        data = pd.read_csv(url)
        data = data.iloc[::-1]  # Reverse the data for chronological order
        print(f"Data retrieved successfully for {stock_symbol}.")
        return data
    except Exception as e:
        print(f"Failed to retrieve data: {e}")
        return None

# def get_data(api_key_path, stock_symbol, days):
#     api_key = load_api_key(api_key_path)
    
#     url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={stock_symbol}&outputsize=full&datatype=csv&apikey={api_key}'
#     try:
#         data = pd.read_csv(url)
#         data['timestamp'] = pd.to_datetime(data['timestamp'])
#         data = data.sort_values('timestamp').reset_index(drop=True)  # Ensure chronological order
        
#         # Filter data for the past `days` days
#         end_date = data['timestamp'].iloc[-1]
#         start_date = end_date - datetime.timedelta(days=days)
#         filtered_data = data[data['timestamp'] >= start_date]
        
#         print(f"Data retrieved successfully for {stock_symbol}.")
#         return filtered_data
#     except Exception as e:
#         print(f"Failed to retrieve data: {e}")
#         return None


def load_data_to_csv(api_key_path, stock_symbol):
    df = get_data(api_key_path, stock_symbol)
    if df is not None and not df.empty:
        today = date.today()
        csv_filename = f"data/{stock_symbol}/{stock_symbol}_{today}.csv"
        df.to_csv(csv_filename, index=False)
        print(f"Data saved to {csv_filename}")
    else:
        print("No data to save.")

In [67]:
today = date.today()
api_key = load_api_key("../keys/fred_api.txt")
# You need an API key from FRED
fred = Fred(api_key=api_key)

# Download data for S&P 500
sp500 = yf.download('^GSPC', start='1999-01-01', end=today)
nasdaq = yf.download('^IXIC', start='1999-01-01', end=today)
dow_jones = yf.download('^DJI', start='1999-01-01', end=today)
tech_sector = yf.download('XLK', start='1999-01-01', end=today)

sp500.reset_index(inplace=True)
nasdaq.reset_index(inplace=True)
dow_jones.reset_index(inplace=True)
tech_sector.reset_index(inplace=True)

AAPL_data = get_data('../keys/alphavantage_api_key.txt', 'AAPL')
MSFT_data = get_data('../keys/alphavantage_api_key.txt', 'MSFT')
GOOGL_data = get_data('../keys/alphavantage_api_key.txt', 'GOOGL')
AMZN_data = get_data('../keys/alphavantage_api_key.txt', 'AMZN')
NVDA_data = get_data('../keys/alphavantage_api_key.txt', 'NVDA')
# AAPL_data = get_data('../keys/alphavantage_api_key.txt', 'AAPL', 60)
# MSFT_data = get_data('../keys/alphavantage_api_key.txt', 'MSFT', 15)
# GOOGL_data = get_data('../keys/alphavantage_api_key.txt', 'GOOGL', 15)
# AMZN_data = get_data('../keys/alphavantage_api_key.txt', 'AMZN', 15)
# NVDA_data = get_data('../keys/alphavantage_api_key.txt', 'NVDA', 15)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Data retrieved successfully for AAPL.
Data retrieved successfully for MSFT.
Data retrieved successfully for GOOGL.
Data retrieved successfully for AMZN.
Data retrieved successfully for NVDA.


In [83]:
AAPL_data

Unnamed: 0,timestamp,open,high,low,close,volume
6237,1999-11-01,80.00,80.6900,77.3700,77.62,2487300
6236,1999-11-02,78.00,81.6900,77.3100,80.25,3564600
6235,1999-11-03,81.62,83.2500,81.0000,81.50,2932700
6234,1999-11-04,82.06,85.3700,80.6200,83.62,3384700
6233,1999-11-05,84.62,88.3700,84.0000,88.31,3721500
...,...,...,...,...,...,...
4,2024-08-12,216.07,219.5099,215.6000,217.53,38028092
3,2024-08-13,219.01,221.8900,219.0100,221.27,44155331
2,2024-08-14,220.57,223.0300,219.7000,221.72,41960574
1,2024-08-15,224.60,225.3500,222.7600,224.72,46414013


In [81]:
AAPL_data2 = get_data('../keys/alphavantage_api_key.txt', 'AAPL')

Data retrieved successfully for AAPL.


In [58]:
sp500 = sp500[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
sp500['Date'] = pd.to_datetime(sp500['Date'])
sp500 = sp500[sp500['Date'] >= '1999-01-01']
nasdaq = nasdaq[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
nasdaq['Date'] = pd.to_datetime(nasdaq['Date'])
nasdaq = nasdaq[nasdaq['Date'] >= '1999-01-01']
dow_jones = dow_jones[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
dow_jones['Date'] = pd.to_datetime(dow_jones['Date'])
dow_jones = dow_jones[dow_jones['Date'] >= '1999-01-01']
tech_sector = tech_sector[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
tech_sector['Date'] = pd.to_datetime(tech_sector['Date'])
tech_sector = tech_sector[tech_sector['Date'] >= '1999-01-01']

sp500 = sp500.set_axis(['Date', 'Open_sp500', 'High_sp500', 'Low_sp500', 'Close_sp500', 'Volume_sp500'], axis=1)
nasdaq = nasdaq.set_axis(['Date', 'Open_nasdaq', 'High_nasdaq', 'Low_nasdaq', 'Close_nasdaq', 'Volume_nasdaq'], axis=1)
dow_jones = dow_jones.set_axis(['Date', 'Open_dow_jones', 'High_dow_jones', 'Low_dow_jones', 'Close_dow_jones', 'Volume_dow_jones'], axis=1)
tech_sector = tech_sector.set_axis(['Date', 'Open_tech_sector', 'High_tech_sector', 'Low_tech_sector', 'Close_tech_sector', 'Volume_tech_sector'], axis=1)

all_dates = pd.concat([
        sp500['Date'],
        nasdaq['Date'],
        dow_jones['Date'],
        tech_sector['Date']
    ]).drop_duplicates().sort_values().reset_index(drop=True)

# Create a DataFrame to start merging
df_merged = pd.DataFrame({'Date': all_dates})

# Merge with market data
df_merged = pd.merge(df_merged, sp500, on='Date', how='left')
df_merged = pd.merge(df_merged, nasdaq, on='Date', how='left')
df_merged = pd.merge(df_merged, dow_jones, on='Date', how='left')
df_merged = pd.merge(df_merged, tech_sector, on='Date', how='left')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tech_sector['Date'] = pd.to_datetime(tech_sector['Date'])


In [61]:
df_merged

Unnamed: 0,Date,Open_sp500,High_sp500,Low_sp500,Close_sp500,Volume_sp500,Open_nasdaq,High_nasdaq,Low_nasdaq,Close_nasdaq,...,Open_dow_jones,High_dow_jones,Low_dow_jones,Close_dow_jones,Volume_dow_jones,Open_tech_sector,High_tech_sector,Low_tech_sector,Close_tech_sector,Volume_tech_sector
0,1999-01-04,1229.229980,1248.810059,1219.099976,1228.099976,877000000,2207.540039,2233.570068,2192.679932,2208.050049,...,9184.009766,9350.330078,9122.469727,9184.269531,89410000,32.656250,33.562500,32.593750,33.000000,650600
1,1999-01-05,1228.099976,1246.109985,1228.099976,1244.780029,775000000,2207.750000,2251.770020,2206.489990,2251.270020,...,9184.780273,9338.740234,9182.980469,9311.190430,79860000,33.062500,34.031250,33.062500,33.843750,295200
2,1999-01-06,1244.780029,1272.500000,1244.780029,1272.339966,986900000,2286.129883,2320.949951,2286.129883,2320.860107,...,9315.419922,9562.219727,9315.419922,9544.969727,103340000,34.687500,34.937500,34.406250,34.843750,624700
3,1999-01-07,1272.339966,1272.339966,1257.680054,1269.729980,863000000,2293.270020,2333.699951,2284.239990,2326.090088,...,9542.139648,9542.139648,9426.019531,9537.759766,88290000,34.500000,35.031250,34.281250,34.734375,534600
4,1999-01-08,1269.729980,1278.239990,1261.819946,1275.089966,937800000,2363.800049,2369.550049,2314.949951,2344.409912,...,9538.280273,9647.959961,9525.410156,9643.320312,103250000,35.500000,35.562500,34.484375,34.875000,532800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6442,2024-08-12,5351.879883,5371.200195,5324.370117,5344.390137,3360160000,16793.640625,16895.789062,16699.390625,16780.609375,...,39556.011719,39587.058594,39251.718750,39357.011719,312380000,206.270004,208.910004,205.309998,207.309998,4290100
6443,2024-08-13,5376.979980,5436.500000,5376.979980,5434.430176,3648980000,16944.740234,17192.789062,16943.949219,17187.609375,...,39445.269531,39792.800781,39392.410156,39765.640625,365560000,209.880005,213.750000,209.619995,213.679993,4999800
6444,2024-08-14,5442.359863,5463.220215,5415.910156,5455.209961,3380050000,17227.640625,17260.730469,17032.169922,17192.599609,...,39800.589844,40068.750000,39737.199219,40008.390625,317800000,215.000000,215.940002,211.960007,214.850006,4461200
6445,2024-08-15,5501.129883,5546.229980,5501.129883,5543.220215,3723310000,17394.539062,17602.720703,17375.410156,17594.500000,...,40295.738281,40590.511719,40295.738281,40563.058594,446850000,217.630005,221.250000,217.160004,221.029999,4759400


In [63]:
market_df = df_merged[['Date', 'Close_sp500', 'Close_nasdaq', 'Close_dow_jones','Close_tech_sector']]

print(market_df)
print(market_df.isna().sum())

           Date  Close_sp500  Close_nasdaq  Close_dow_jones  Close_tech_sector
0    1999-01-04  1228.099976   2208.050049      9184.269531          33.000000
1    1999-01-05  1244.780029   2251.270020      9311.190430          33.843750
2    1999-01-06  1272.339966   2320.860107      9544.969727          34.843750
3    1999-01-07  1269.729980   2326.090088      9537.759766          34.734375
4    1999-01-08  1275.089966   2344.409912      9643.320312          34.875000
...         ...          ...           ...              ...                ...
6442 2024-08-12  5344.390137  16780.609375     39357.011719         207.309998
6443 2024-08-13  5434.430176  17187.609375     39765.640625         213.679993
6444 2024-08-14  5455.209961  17192.599609     40008.390625         214.850006
6445 2024-08-15  5543.220215  17594.500000     40563.058594         221.029999
6446 2024-08-16  5554.250000  17631.720703     40659.761719         221.399994

[6447 rows x 5 columns]
Date                 0
Clos

In [None]:
AAPL_merged = pd.merge(market_df, AAPL_data, left_on='Date', right_on='timestamp', how='inner')
MSFT_merged = pd.merge(market_df, MSFT_data, left_on='Date', right_on='timestamp', how='inner')
GOOGL_merged = pd.merge(market_df, GOOGL_data, left_on='Date', right_on='timestamp', how='inner')
AMZN_merged = pd.merge(market_df, AMZN_data, left_on='Date', right_on='timestamp', how='inner')
NVDA_merged = pd.merge(market_df, NVDA_data, left_on='Date', right_on='timestamp', how='inner')