In [1]:
import requests
import pandas as pd

api_key = "4FHTO2GAT3NL1EZ8"
symbol = "IKF"

# Fetch stock data
stock_url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=IYF&apikey=4FHTO2GAT3NL1EZ8&outputsize=full'
stock_r = requests.get(stock_url)
stock_data = stock_r.json()

# Extract the time series data
time_series_data = stock_data['Time Series (Daily)']

# Convert the data into a DataFrame
df = pd.DataFrame(time_series_data).T
df.index = pd.to_datetime(df.index)

# Fetch Federal Funds Rate data
fed_url = 'https://www.alphavantage.co/query?function=FEDERAL_FUNDS_RATE&interval=daily&apikey=4FHTO2GAT3NL1EZ8'
fed_r = requests.get(fed_url)
fed_data = fed_r.json()

# Extract Federal Funds Rate data
fed_time_series_data = fed_data['data']
fed_dates = [entry['date'] for entry in fed_time_series_data]
fed_rates = [float(entry['value']) for entry in fed_time_series_data]

# Create a DataFrame for Federal Funds Rate
fed_df = pd.DataFrame({'Federal Funds Rate': fed_rates}, index=fed_dates)
fed_df.index = pd.to_datetime(fed_df.index)

# Merge DataFrames based on date index
plus_fed_df = df.join(fed_df, how='left')

# Fetch unemployment data
job_url = 'https://www.alphavantage.co/query?function=UNEMPLOYMENT&apikey=4FHTO2GAT3NL1EZ8'
job_r = requests.get(job_url)
job_data = job_r.json()

# Extract unemployment data
job_time_series_data = job_data['data']
job_dates = [entry['date'] for entry in job_time_series_data]
job_rates = [float(entry['value']) for entry in job_time_series_data]

# Create a DataFrame for unemployment
job_df = pd.DataFrame({'Unemployment Rate': job_rates}, index=job_dates)
job_df.index = pd.to_datetime(job_df.index)

# Resample unemployment data to match stock data frequency (daily)
job_df_resampled = job_df.resample('D').ffill()

# Merge resampled unemployment data into plus_fed_df
final_df = plus_fed_df.join(job_df_resampled, how='left')

# Drop rows with NaN values
final_df_cleaned = final_df.dropna()

# Calculate 20-day EMA of closing column
final_df_cleaned['20EMA'] = final_df_cleaned['4. close'].rolling(window=20).mean()

final_feature_df = final_df_cleaned.dropna().drop(columns=['2. high', '3. low'])


final_feature_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df_cleaned['20EMA'] = final_df_cleaned['4. close'].rolling(window=20).mean()


Unnamed: 0,1. open,4. close,5. volume,Federal Funds Rate,Unemployment Rate,20EMA
2023-06-02,71.5900,72.7000,275646,5.08,3.6,73.4760
2023-06-01,70.5400,70.9200,105384,5.08,3.6,73.2915
2023-05-31,70.8300,70.2000,129842,5.08,3.7,73.0960
2023-05-30,71.1700,71.2100,94654,5.08,3.7,73.0075
2023-05-26,70.6400,70.9900,95063,5.08,3.7,72.9030
...,...,...,...,...,...,...
2000-06-02,76.9100,78.3100,2200,6.44,4.0,73.8740
2000-06-01,73.8800,75.4200,1000,6.65,4.0,74.0270
2000-05-31,73.6300,73.6300,200,6.83,4.0,74.0990
2000-05-30,71.5900,71.5900,0,6.71,4.0,74.0690


In [2]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math

In [None]:
x = final_feature_df[["1. open", "5. volume","20EMA","Federal Funds Rate","Unemployment Rate","20EMA"]].to_numpy()
y = final_feature_df["4. close"].to_numpy()

# Normalize the input data
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)
y_scaled = scaler.fit_transform(y.reshape(-1, 1))

# Prepare sequence data
sequence_length = 100  # Length of input sequences
sequences = []
targets = []

for i in range(len(y_scaled) - sequence_length - 60):
    sequences.append(x_scaled[i:i+sequence_length])
    targets.append(y_scaled[i+sequence_length:i+sequence_length+60])

# Convert lists to NumPy arrays
sequences = np.array(sequences)
targets = np.array(targets)

# Split into training and testing sets
split_ratio = 0.8
split_index = int(split_ratio * len(sequences))

x_train = sequences[:split_index]
y_train = targets[:split_index]

x_test = sequences[split_index:]
y_test = targets[split_index:]

# Build LSTM model
model_lstm = Sequential()
model_lstm.add(LSTM(128, return_sequences=True, input_shape=(sequence_length, x.shape[1])))
model_lstm.add(Dropout(0.2))  # Add Dropout layer after each LSTM layer

model_lstm.add(LSTM(64, return_sequences=False))
model_lstm.add(Dropout(0.2))  # Add Dropout layer after each LSTM layer

model_lstm.add(Dense(25))
model_lstm.add(Dense(60))  # Predict 60 days of prices

# Compile the model
model_lstm.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model_lstm.fit(x_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Make predictions using the model
predictions_scaled = model_lstm.predict(x_test)

# Inverse normalize the predicted results
predictions = scaler.inverse_transform(predictions_scaled)


In [None]:
y_test_2d = y_test.reshape(-1, 1)
predictions_2d = predictions.reshape(-1, 1)

rmse_test = np.sqrt(mean_squared_error(y_test_2d, predictions_2d))
rmse_test

rmse = np.sqrt(mean_squared_error(y_test_2d, predictions_2d))


In [None]:
model_lstm.summary()

In [None]:
# Visualize the predicted results
plt.figure(figsize=(10, 6))
plt.plot(y[-60:], label='Actual Prices')
plt.plot(predictions[0], label='Predicted Prices')
plt.xlabel('Days')
plt.ylabel('Price')
plt.title('Actual vs. Predicted Prices for the Next 60 Days')
plt.legend()
plt.show()