<a href="https://colab.research.google.com/github/freshpex/AI-Final-Project/blob/main/AI_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from sklearn.metrics import mean_squared_error


In [None]:
# List of datasets files
datasets = {
    'Tesla': 'datasets/tesla.csv',
    'Apple': 'datasets/apple.csv',
    'LG': 'datasets/lgtelevision.csv',
    'Netflix': 'datasets/netflix.csv',
    'Google': 'datasets/google.csv'
}

# Combine datasets into a single dataframe
combined_dfs = []

for name, path in datasets.items():
    df = pd.read_csv(path)
    df['Date'] = pd.to_datetime(df['Date'])
    # Ensure all dates are timezone-naive
    df['Date'] = df['Date'].dt.tz_localize(None)
    df.set_index('Date', inplace=True)
    combined_dfs.append(df)

combined_df = pd.concat(combined_dfs)

# Sort by date
combined_df.sort_index(inplace=True)

combined_df.head(5)

In [None]:
combined_df.info()

In [None]:
combined_df.describe()

In [None]:
# use pit to visualize stock prices over the time, so we can understand the trends
plt.figure(figsize=(14, 7))
plt.plot(combined_df['Close'])
plt.title('Stock Prices Over Time')
plt.xlabel('Date')
plt.ylabel('Close Price')
plt.show()

**LSTM**

In [None]:
# we want to Normalize the data to make convergence faster
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(combined_df)

**Training** **and** **Testing**

In [None]:
# split the scaled datat into training sets, we are going to use 80 to 20%
train_size = int(len(scaled_data) * 0.8)
train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]

# we are using the inputs to predict the output based on the later days
def create_dataset(data, time_step=1):
    X = []
    y = []
    for i in range(len(data) - time_step - 1):
        a = data[i:(i + time_step)]
        X.append(a)
        y.append(data[i + time_step, 3])
    return np.array(X), np.array(y)

time_step = 60
X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

# Reshapes the input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2])



**Build and Train**

In [None]:
# Build the LSTM model
model = Sequential()

model.add(LSTM(100, return_sequences=True, input_shape=(time_step, X_train.shape[2])))
model.add(Dropout(0.2))

model.add(LSTM(100, return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(25))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

**Train Model**

In [None]:
# Train the model
hist = model.fit(X_train, y_train, batch_size=32, epochs=15, verbose=2)

**Plot the Loss**

In [None]:
plt.plot(hist.history['loss'])
plt.title('Training loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['train'], loc='upper left')
plt.show()

**Predictions**

In [None]:
#train and test the model
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# since we scaled earlier we need to inverse and transform to get the actual stock prices
train_predict = scaler.inverse_transform(np.concatenate([train_predict, np.zeros((train_predict.shape[0], scaled_data.shape[1] - 1))], axis=1))[:, 0]
test_predict = scaler.inverse_transform(np.concatenate([test_predict, np.zeros((test_predict.shape[0], scaled_data.shape[1] - 1))], axis=1))[:, 0]



**Plot Predictions**

In [None]:
# Shift train predictions for plotting
train_predict_plot = np.empty_like(scaled_data[:, 3])
train_predict_plot[:] = np.nan
train_predict_plot[time_step:len(train_predict) + time_step] = train_predict

# Shift test predictions for plotting
test_predict_plot = np.empty_like(scaled_data[:, 3])
test_predict_plot[:] = np.nan
test_predict_plot[len(train_predict) + (time_step * 2) + 1:len(scaled_data) - 1] = test_predict

# Plot baseline and predictions
plt.figure(figsize=(14, 7))
plt.plot(scaler.inverse_transform(scaled_data)[:, 3], label='Actual Stock Price')
plt.plot(train_predict_plot, label='Train Prediction')
plt.plot(test_predict_plot, label='Test Prediction')
plt.title('Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()

**Test Model With a Different Dataset**

In [None]:
# Load a new dataset to test
file = 'datasets/apple.csv'
testData = pd.read_csv(file)
testData["Date"] = pd.to_datetime(testData["Date"])
testData['Date'] = testData['Date'].dt.tz_localize(None)
testData.set_index('Date', inplace=True)
testData = testData.dropna()

# Normalize the dataset using the same scaler
input_scaled = scaler.transform(testData.values)



In [None]:
# Initialize variables for testing
x_test = []
length = len(testData)

# Create input sequences for the test model
for i in range(time_step, length):
    x_test.append(input_scaled[i-time_step:i])

# Convert to numpy array and reshape
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], x_test.shape[2]))


In [None]:
# Make predictions
y_pred = model.predict(x_test)

# Inverse transform predictions
predicted_price = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], scaled_data.shape[1]-1)), y_pred), axis=1))[:, -1]

# Get the actual closing prices for comparison
y_test = testData.iloc[time_step:, 3].values


**Visualization**

In [None]:
# Plot the results
plt.figure(figsize=(14, 7))
plt.plot(testData.index[time_step:], y_test, color='blue', label='Actual Stock Price')
plt.plot(testData.index[time_step:], predicted_price, color='red', label='Predicted Stock Price')
plt.title('Stock Price Prediction')
plt.xlabel('Date')
plt.ylabel('Stock Price')
plt.legend()
plt.show()

**Performance Metrics**

In [None]:
# Calculate Mean Squared Error (MSE) and Root Mean Squared Error (RMSE)
mse = mean_squared_error(y_test, predicted_price)
rmse = np.sqrt(mse)

# Print the performance metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")