In [1]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Set the rolling window size
window_size = 5

# Function to predict the next 7 days
def predict_next_7_days(model, scaler, last_known_data):
    predictions = []
    input_data = last_known_data.reshape(1, 1, -1)
    for _ in range(7):
        next_pred = model.predict(input_data, verbose=0)
        predictions.append(next_pred[0, 0])

        # Update input_data with the new prediction
        next_input = np.roll(input_data, -1, axis=2)
        next_input[0, 0, -1] = next_pred
        input_data = next_input
    return scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()

# Function to predict based on the given ticker and check the presence of date
def process_and_predict_for_ticker(ticker_value, date_input):
    # Load data for the specific ticker
    data = pd.read_csv(f'TRAIN/{ticker_value}.csv')
    data['Date'] = pd.to_datetime(data['Date'])
    data['Rolling_Avg'] = data['Close'].rolling(window=window_size).mean()
    data.dropna(inplace=True)

    # Create lag features
    for i in range(1, 11):
        data[f'Lag_{i}'] = data['Close'].shift(i)
    data.dropna(inplace=True)

    # Check if the date exists in the dataset
    if date_input in data['Date'].values:
        print(f"Date {date_input} exists in the dataset.")

        # Extract actual close value on that date
        actual_close_value = data[data['Date'] == date_input]['Close'].values[0]
        print(f"Actual Close Value on {date_input}: {actual_close_value}")

        # Create features for prediction based on the date provided
        row_for_prediction = data[data['Date'] == date_input].iloc[0, -15:].values
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(data[['Close'] + [f'Lag_{i}' for i in range(1, 11)]])

        # Make prediction for the next 7 days
        predicted_values = predict_next_7_days(model, scaler, row_for_prediction[1:])  # Exclude the date column

        print("\nNext 7 Days Predictions:")
        for i, predicted in enumerate(predicted_values, start=1):
            print(f"Day {i}: Predicted Close = {predicted:.2f}")
    else:
        print(f"Date {date_input} is not present in the dataset. Predicting future values...")

        # Use last row's lag features to predict the next 7 days
        last_known_data = data.iloc[-1, -15:].values  # Last row's lag features
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled_data = scaler.fit_transform(data[['Close'] + [f'Lag_{i}' for i in range(1, 11)]])

        # Predict the next 7 days
        predicted_values = predict_next_7_days(model, scaler, last_known_data[1:])  # Exclude the date column

        # Generate future dates
        last_date = data['Date'].iloc[-1]
        future_dates = [last_date + pd.Timedelta(days=i) for i in range(1, 8)]

        print("\nNext 7 Days Predictions:")
        for date, predicted in zip(future_dates, predicted_values):
            print(f"{date.date()}: Predicted Close = {predicted:.2f}")

# List of tickers
tickers = [f.split('.')[0] for f in os.listdir('TRAIN') if f.endswith('.csv')]

# Select a ticker (example: 'AAPL') and the date (example: '2023-12-01') to check
ticker_input = 'AAPL'  # Replace with any ticker you want to check
date_input = '2023-12-01'  # Replace with any date in YYYY-MM-DD format

# Check if the ticker exists in the dataset
if ticker_input in tickers:
    print(f"Processing {ticker_input}...")

    # Load the data and preprocess
    data = pd.read_csv(f'TRAIN/{ticker_input}.csv')
    data['Date'] = pd.to_datetime(data['Date'])
    data['Rolling_Avg'] = data['Close'].rolling(window=window_size).mean()
    data.dropna(inplace=True)

    # Create lag features
    for i in range(1, 11):
        data[f'Lag_{i}'] = data['Close'].shift(i)
    data.dropna(inplace=True)

    # Scale data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data[['Close'] + [f'Lag_{i}' for i in range(1, 11)]])
    X = scaled_data[:, 1:]  # Lag features
    y = scaled_data[:, 0]   # Target (Close)

    # Split data
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    # Reshape for LSTM
    X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
    X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

    # Build and train LSTM model
    model = Sequential([
        LSTM(50, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=10, batch_size=15, validation_data=(X_test, y_test))

    # Call the function to predict for the selected ticker and date
    process_and_predict_for_ticker(ticker_input, pd.to_datetime(date_input))
else:
    print(f"Ticker {ticker_input} not found in the dataset.")


FileNotFoundError: [Errno 2] No such file or directory: 'TRAIN'

In [2]:
pip install tensorflow


