In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from scipy import stats

data=yf.download('AAPL',start='2024-01-01',end='2025-01-01')
data.to_csv('Apple.csv')

print("Data Head:")
print(data.head())

z_scores = np.abs(stats.zscore(data[['Open', 'High', 'Low', 'Close', 'Volume']]))
data = data[(z_scores < 3).all(axis=1)]  # Keep data within 3 standard deviations

data['Price_Up'] = (data['Close'] > data['Close'].shift(1)).astype(int)
data = data.dropna()

features = ['Open', 'High', 'Low', 'Close', 'Volume']
scaler = MinMaxScaler()
data_scaled = pd.DataFrame(scaler.fit_transform(data[features]), columns=features, index=data.index)
data_scaled['Price_Up'] = data['Price_Up']

# Save preprocessed data
data_scaled.to_csv('AAPL_preprocessed.csv')

# Define the stock symbol
stock_symbol = 'AAPL'  # Assign the stock symbol here

# Visualizations (unchanged)
plt.figure(figsize=(10, 4))
plt.plot(data['Close'], label='Closing Price')
plt.title(f'{stock_symbol} Closing Price Trend (2024)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.tight_layout()
plt.savefig('closing_price_trend.png')
plt.show()

plt.figure(figsize=(10, 4))
plt.plot(data['Volume'], color='orange', label='Volume')
plt.title(f'{stock_symbol} Volume Trend (2024)')
plt.xlabel('Date')
plt.ylabel('Volume')
plt.legend()
plt.tight_layout()
plt.savefig('volume_trend.png')
plt.show()

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM
from keras.utils import to_categorical


data = pd.read_csv('/content/AAPL_preprocessed.csv', index_col=0)


X = data.drop('Price_Up', axis=1).values
y = data['Price_Up'].values
y_cat = to_categorical(y)


X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=42)


X_train_rnn = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

mlp_model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(2, activation='softmax')
])
mlp_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
mlp_history = mlp_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))


rnn_model = Sequential([
    SimpleRNN(50, activation='relu', input_shape=(1, X_train.shape[1])),
    Dense(2, activation='softmax')
])
rnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
rnn_history = rnn_model.fit(X_train_rnn, y_train, epochs=20, batch_size=32, validation_data=(X_test_rnn, y_test))

lstm_model = Sequential([
    LSTM(50, activation='relu', input_shape=(1, X_train.shape[1])),
    Dense(2, activation='softmax')
])
lstm_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
lstm_history = lstm_model.fit(X_train_rnn, y_train, epochs=20, batch_size=32, validation_data=(X_test_rnn, y_test))


def evaluate_model(model, X, y_true, model_name):
    y_pred = model.predict(X)
    y_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_true, axis=1)

    print(f"\n--- {model_name} ---")
    print("Confusion Matrix:")
    print(confusion_matrix(y_true_classes, y_classes))
    print("\nClassification Report:")
    print(classification_report(y_true_classes, y_classes))


    fpr, tpr, _ = roc_curve(y_true[:,1], y_pred[:,1])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{model_name} (AUC = {roc_auc:.2f})')


plt.figure()
evaluate_model(mlp_model, X_test, y_test, 'MLP')
evaluate_model(rnn_model, X_test_rnn, y_test, 'SimpleRNN')
evaluate_model(lstm_model, X_test_rnn, y_test, 'LSTM')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves')
plt.legend()
plt.tight_layout()
plt.savefig('roc_curves.png')
plt.show()


mlp_acc = mlp_history.history['val_accuracy'][-1]
rnn_acc = rnn_history.history['val_accuracy'][-1]
lstm_acc = lstm_history.history['val_accuracy'][-1]

model_names = ['MLP', 'SimpleRNN', 'LSTM']
accuracies = [mlp_acc, rnn_acc, lstm_acc]

plt.figure(figsize=(6, 4))
bars = plt.bar(model_names, accuracies, color=['skyblue', 'orange', 'lightgreen'])
plt.title('Validation Accuracy vs. Model Type')
plt.ylabel('Accuracy')
plt.ylim(0, 1)


for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, yval + 0.01, f'{yval:.2f}', ha='center')

plt.tight_layout()
plt.savefig('accuracy_vs_model.png')
plt.show()
