In [1]:
#Import Necessary Libraries
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
import tensorflow as tf 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.layers import LSTM, Dense
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasRegressor
#from fbprophet import Prophet

In [2]:
# Loading cleaned stock data
nike_data = pd.read_csv('C:/Users/johnt/Stock-Market-Data-Analysis-Visualization-and-Best-Predictive-Models-Development/datasets/cleaned_nike_data.csv')
adidas_data = pd.read_csv('C:/Users/johnt/Stock-Market-Data-Analysis-Visualization-and-Best-Predictive-Models-Development/datasets/cleaned_adidas_data.csv')
pepsico_data = pd.read_csv('C:/Users/johnt/Stock-Market-Data-Analysis-Visualization-and-Best-Predictive-Models-Development/datasets/cleaned_pepsi_data.csv')
sony_data = pd.read_csv('C:/Users/johnt/Stock-Market-Data-Analysis-Visualization-and-Best-Predictive-Models-Development/datasets/cleaned_sony_data.csv')


In [3]:
# Ensure the 'Date' column is correctly formatted and set as the index
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

In [4]:
# Verify the index of the DataFrame
print(nike_data.index)
print(adidas_data.index)
print(pepsico_data.index)
print(sony_data.index)


DatetimeIndex(['1980-12-02', '1980-12-03', '1980-12-04', '1980-12-05',
               '1980-12-08', '1980-12-09', '1980-12-10', '1980-12-11',
               '1980-12-12', '1980-12-15',
               ...
               '2024-12-11', '2024-12-12', '2024-12-13', '2024-12-16',
               '2024-12-17', '2024-12-18', '2024-12-19', '2024-12-20',
               '2024-12-23', '2024-12-24'],
              dtype='datetime64[ns]', name='Date', length=11107, freq=None)
DatetimeIndex(['1995-11-20', '1995-11-21', '1995-11-22', '1995-11-23',
               '1995-11-24', '1995-11-27', '1995-11-28', '1995-11-29',
               '1995-11-30', '1995-12-01',
               ...
               '2024-12-10', '2024-12-11', '2024-12-12', '2024-12-13',
               '2024-12-16', '2024-12-17', '2024-12-18', '2024-12-19',
               '2024-12-20', '2024-12-23'],
              dtype='datetime64[ns]', name='Date', length=7373, freq=None)
DatetimeIndex(['1980-03-18', '1980-03-19', '1980-03-20', '1980-03-21'

In [5]:
# Function to create moving averages
def create_moving_averages(df, window):
    df[f'MA_{window}'] = df['Price'].rolling(window=window).mean()

# Apply moving averages to each dataset
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    create_moving_averages(df, 7)
    create_moving_averages(df, 30)

# Forward fill 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.ffill(inplace=True)

# Backward fill 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.bfill(inplace=True)

# Drop any remaining 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.dropna(inplace=True)

# Verify the changes
print(nike_data.head())
print(adidas_data.head())
print(pepsico_data.head())
print(sony_data.head())



            Price  Open  High   Low     Vol. Change %  Unnamed: 7  Unnamed: 8  \
Date                                                                            
1980-12-02   0.18  0.18  0.18  0.18  100.71M  -99.79%        4.83        4.84   
1980-12-03   0.18  0.18  0.18  0.18   25.18M    0.00%        4.83        4.84   
1980-12-04   0.18  0.18  0.18  0.18   22.69M    0.00%        4.83        4.84   
1980-12-05   0.17  0.17  0.18  0.17    7.59M   -5.56%        4.83        4.84   
1980-12-08   0.16  0.16  0.16  0.16    5.41M   -5.88%        4.83        4.84   

            Unnamed: 9  Unnamed: 10 Unnamed: 11 Unnamed: 12      MA_7  \
Date                                                                    
1980-12-02        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-03        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-04        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-05        4.84         4.66      10.43M      -2.82%  0.1685

In [6]:
# Function to create price momentum
def create_price_momentum(df, period):
    df[f'Momentum_{period}'] = df['Price'].pct_change(periods=period)

# Apply price momentum to each dataset
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    create_price_momentum(df, 7)
    create_price_momentum(df, 30)

# Forward fill 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.ffill(inplace=True)

# Backward fill 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.bfill(inplace=True)

# Drop any remaining 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.dropna(inplace=True)

# Verify the changes
print(nike_data.head())
print(adidas_data.head())
print(pepsico_data.head())
print(sony_data.head())


            Price  Open  High   Low     Vol. Change %  Unnamed: 7  Unnamed: 8  \
Date                                                                            
1980-12-02   0.18  0.18  0.18  0.18  100.71M  -99.79%        4.83        4.84   
1980-12-03   0.18  0.18  0.18  0.18   25.18M    0.00%        4.83        4.84   
1980-12-04   0.18  0.18  0.18  0.18   22.69M    0.00%        4.83        4.84   
1980-12-05   0.17  0.17  0.18  0.17    7.59M   -5.56%        4.83        4.84   
1980-12-08   0.16  0.16  0.16  0.16    5.41M   -5.88%        4.83        4.84   

            Unnamed: 9  Unnamed: 10 Unnamed: 11 Unnamed: 12      MA_7  \
Date                                                                    
1980-12-02        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-03        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-04        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-05        4.84         4.66      10.43M      -2.82%  0.1685

In [7]:
# Function to create volatility indicators
def create_volatility(df, window):
    df[f'Volatility_{window}'] = df['Price'].rolling(window=window).std()

# Apply volatility indicators to each dataset
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    create_volatility(df, 7)
    create_volatility(df, 30)

# Forward fill 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.ffill(inplace=True)

# Backward fill 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.bfill(inplace=True)

# Drop any remaining 'NaN' values
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.dropna(inplace=True)

# Verify the changes
print(nike_data.head())
print(adidas_data.head())
print(pepsico_data.head())
print(sony_data.head())


            Price  Open  High   Low     Vol. Change %  Unnamed: 7  Unnamed: 8  \
Date                                                                            
1980-12-02   0.18  0.18  0.18  0.18  100.71M  -99.79%        4.83        4.84   
1980-12-03   0.18  0.18  0.18  0.18   25.18M    0.00%        4.83        4.84   
1980-12-04   0.18  0.18  0.18  0.18   22.69M    0.00%        4.83        4.84   
1980-12-05   0.17  0.17  0.18  0.17    7.59M   -5.56%        4.83        4.84   
1980-12-08   0.16  0.16  0.16  0.16    5.41M   -5.88%        4.83        4.84   

            Unnamed: 9  Unnamed: 10 Unnamed: 11 Unnamed: 12      MA_7  \
Date                                                                    
1980-12-02        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-03        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-04        4.84         4.66      10.43M      -2.82%  0.168571   
1980-12-05        4.84         4.66      10.43M      -2.82%  0.1685

In [8]:
# Sort the data by date to ensure the date index is monotonic
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df.sort_index(inplace=True)

# Set the frequency for the date index
for df in [nike_data, adidas_data, pepsico_data, sony_data]:
    df = df.asfreq('B')


In [9]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import numpy as np

# Function to fit ARIMA model and evaluate performance
def fit_arima(df, order):
    model = ARIMA(df['Price'], order=order)
    model_fit = model.fit()
    predictions = model_fit.predict(start=len(df) - 30, end=len(df) - 1)
    mse = mean_squared_error(df['Price'][-30:], predictions)
    rmse = np.sqrt(mse)
    return rmse

# Fit ARIMA model to each dataset
nike_arima_rmse = fit_arima(nike_data, (2, 1, 2))
adidas_arima_rmse = fit_arima(adidas_data, (2, 1, 2))
pepsico_arima_rmse = fit_arima(pepsico_data, (2, 1, 2))
sony_arima_rmse = fit_arima(sony_data, (2, 1, 2))

print("Nike ARIMA RMSE:", nike_arima_rmse)
print("Adidas ARIMA RMSE:", adidas_arima_rmse)
print("PepsiCo ARIMA RMSE:", pepsico_arima_rmse)
print("Sony ARIMA RMSE:", sony_arima_rmse)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Nike ARIMA RMSE: 1.027369774111964
Adidas ARIMA RMSE: 3.017248096210527
PepsiCo ARIMA RMSE: 4.350190188745633
Sony ARIMA RMSE: 0.34950993688732385


In [10]:
# Function to fit LSTM model and evaluate performance
def fit_lstm(df):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df['Price'].values.reshape(-1, 1))

    X, y = [], []
    for i in range(60, len(scaled_data)):
        X.append(scaled_data[i-60:i, 0])
        y.append(scaled_data[i, 0])
    X, y = np.array(X), np.array(y)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    model = Sequential()
    model.add(Input(shape=(X.shape[1], 1)))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(LSTM(units=50))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X, y, epochs=1, batch_size=1, verbose=2)

    predictions = model.predict(X)
    predictions = scaler.inverse_transform(predictions)
    rmse = np.sqrt(np.mean((predictions - df['Price'].values[60:])**2))
    return rmse

# Fit LSTM model to each dataset
nike_lstm_rmse = fit_lstm(nike_data)
adidas_lstm_rmse = fit_lstm(adidas_data)
pepsico_lstm_rmse = fit_lstm(pepsico_data)
sony_lstm_rmse = fit_lstm(sony_data)

print("Nike LSTM RMSE:", nike_lstm_rmse)
print("Adidas LSTM RMSE:", adidas_lstm_rmse)
print("PepsiCo LSTM RMSE:", pepsico_lstm_rmse)
print("Sony LSTM RMSE:", sony_lstm_rmse)


11047/11047 - 353s - 32ms/step - loss: 3.7948e-04
[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step
7313/7313 - 260s - 35ms/step - loss: 8.0399e-04
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 28ms/step
11226/11226 - 370s - 33ms/step - loss: 8.0363e-04
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step
11227/11227 - 371s - 33ms/step - loss: 5.4018e-04
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step
Nike LSTM RMSE: 54.17864162515245
Adidas LSTM RMSE: 121.3897548176392
PepsiCo LSTM RMSE: 70.26795559596314
Sony LSTM RMSE: 7.627800760863745


In [12]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV



# Function to create LSTM model
def create_lstm_model(units=50, optimizer='adam'):
    model = Sequential()
    model.add(Input(shape=(60, 1)))
    model.add(LSTM(units=units, return_sequences=True))
    model.add(LSTM(units=units))
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

# Prepare data
def prepare_data(df):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df['Price'].values.reshape(-1, 1))

    X, y = [], []
    for i in range(60, len(scaled_data)):
        X.append(scaled_data[i-60:i, 0])
        y.append(scaled_data[i, 0])
    X, y = np.array(X), np.array(y)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
    return X, y, scaler

# Fit LSTM model with Grid Search
def fit_lstm_with_grid_search(df):
    X, y, scaler = prepare_data(df)
    model = KerasRegressor(build_fn=create_lstm_model, verbose=0)

    # Define hyperparameter grid
    param_grid = {
        'units': [50, 100],
        'optimizer': ['adam', 'rmsprop'],
        'batch_size': [16, 32],
        'epochs': [10, 20]
    }

    grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3)
    grid_result = grid.fit(X, y)

    # Best hyperparameters
    best_params = grid_result.best_params_
    print("Best Hyperparameters:", best_params)

    # Train model with best hyperparameters
    best_model = create_lstm_model(units=best_params['units'], optimizer=best_params['optimizer'])
    best_model.fit(X, y, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=2)

    predictions = best_model.predict(X)
    predictions = scaler.inverse_transform(predictions)
    rmse = np.sqrt(np.mean((predictions - df['Price'].values[60:])**2))
    return rmse

# Fit LSTM model to each dataset with Grid Search
nike_lstm_rmse = fit_lstm_with_grid_search(nike_data)
adidas_lstm_rmse = fit_lstm_with_grid_search(adidas_data)
pepsico_lstm_rmse = fit_lstm_with_grid_search(pepsico_data)
sony_lstm_rmse = fit_lstm_with_grid_search(sony_data)

print("Nike LSTM RMSE:", nike_lstm_rmse)
print("Adidas LSTM RMSE:", adidas_lstm_rmse)
print("PepsiCo LSTM RMSE:", pepsico_lstm_rmse)
print("Sony LSTM RMSE:", sony_lstm_rmse)


ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

In [None]:

# Function to create LSTM model
def create_lstm_model(units=50, optimizer='adam'):
    model = Sequential()
    model.add(Input(shape=(60, 1)))
    model.add(LSTM(units=units, return_sequences=True))
    model.add(LSTM(units=units))
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model

# Prepare data
def prepare_data(df):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df['Price'].values.reshape(-1, 1))

    X, y = [], []
    for i in range(60, len(scaled_data)):
        X.append(scaled_data[i-60:i, 0])
        y.append(scaled_data[i, 0])
    X, y = np.array(X), np.array(y)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
    return X, y, scaler

# Function to evaluate model performance
def evaluate_model(df, units, optimizer, batch_size, epochs):
    X, y, scaler = prepare_data(df)
    model = create_lstm_model(units=units, optimizer=optimizer)
    model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=2)
    predictions = model.predict(X)
    predictions = scaler.inverse_transform(predictions)
    rmse = np.sqrt(np.mean((predictions - df['Price'].values[60:])**2))
    return rmse

# Define hyperparameter grid
units_list = [50, 100]
optimizer_list = ['adam', 'rmsprop']
batch_size_list = [16, 32]
epochs_list = [10, 20]

# Perform manual hyperparameter tuning
best_rmse = float('inf')
best_params = {}

for units in units_list:
    for optimizer in optimizer_list:
        for batch_size in batch_size_list:
            for epochs in epochs_list:
                rmse = evaluate_model(nike_data, units, optimizer, batch_size, epochs)
                if rmse < best_rmse:
                    best_rmse = rmse
                    best_params = {
                        'units': units,
                        'optimizer': optimizer,
                        'batch_size': batch_size,
                        'epochs': epochs
                    }

print("Best Hyperparameters:", best_params)
print("Best RMSE:", best_rmse)


Epoch 1/10
691/691 - 41s - 60ms/step - loss: 8.3505e-04
Epoch 2/10
691/691 - 34s - 49ms/step - loss: 1.6988e-04
Epoch 3/10
691/691 - 34s - 49ms/step - loss: 1.3147e-04
Epoch 4/10
691/691 - 34s - 49ms/step - loss: 1.0880e-04
Epoch 5/10
691/691 - 34s - 49ms/step - loss: 9.0923e-05
Epoch 6/10
691/691 - 34s - 49ms/step - loss: 8.1341e-05
Epoch 7/10
691/691 - 35s - 50ms/step - loss: 7.6259e-05
Epoch 8/10
691/691 - 33s - 48ms/step - loss: 6.7511e-05
Epoch 9/10
691/691 - 40s - 58ms/step - loss: 6.0214e-05
Epoch 10/10
691/691 - 34s - 50ms/step - loss: 5.3691e-05
[1m346/346[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step
Epoch 1/20
691/691 - 41s - 60ms/step - loss: 6.3391e-04
Epoch 2/20
691/691 - 34s - 49ms/step - loss: 1.5342e-04
Epoch 3/20
691/691 - 34s - 49ms/step - loss: 1.4054e-04
Epoch 4/20
691/691 - 34s - 49ms/step - loss: 1.0236e-04
Epoch 5/20
691/691 - 35s - 50ms/step - loss: 9.0017e-05
Epoch 6/20
691/691 - 34s - 49ms/step - loss: 7.1562e-05
Epoch 7/20
691/691 - 36s -

In [None]:

# Prepare data function
def prepare_data(df):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df[['Close']].values.reshape(-1, 1))
    X, y = [], []
    for i in range(60, len(scaled_data)):
        X.append(scaled_data[i-60:i, 0])
        y.append(scaled_data[i, 0])
    X, y = np.array(X), np.array(y)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))
    return X, y, scaler

# Prepare data for each stock
X_nike, y_nike, scaler_nike = prepare_data(nike_data)
X_adidas, y_adidas, scaler_adidas = prepare_data(adidas_data)
X_pepsico, y_pepsico, scaler_pepsico = prepare_data(pepsico_data)
X_sony, y_sony, scaler_sony = prepare_data(sony_data)


In [None]:

# Split data for Nike
X_train_nike, X_test_nike, y_train_nike, y_test_nike = train_test_split(X_nike, y_nike, test_size=0.2, random_state=42)

# Split data for Adidas
X_train_adidas, X_test_adidas, y_train_adidas, y_test_adidas = train_test_split(X_adidas, y_adidas, test_size=0.2, random_state=42)

# Split data for PepsiCo
X_train_pepsico, X_test_pepsico, y_train_pepsico, y_test_pepsico = train_test_split(X_pepsico, y_pepsico, test_size=0.2, random_state=42)

# Split data for Sony
X_train_sony, X_test_sony, y_train_sony, y_test_sony = train_test_split(X_sony, y_sony, test_size=0.2, random_state=42)
