In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import mplfinance as mpf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler


# Data loading
df = pd.read_csv('Nvidia Stocks June 22.csv')

# Display the first few rows of the dataset
print(df.head())


from ydata_profiling import ProfileReport
profile = ProfileReport(df, title="Profiling Report")
# Data cleaning
# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)

# Perform any other required data cleaning steps

# Feature engineering
# Add a new column for daily price change
df['Price Change'] = df['Close'] - df['Open']
'''

# Data visualization
# Line plot of closing price over time
plt.figure(figsize=(10, 6))
plt.plot(df['Date'], df['Close'])
plt.title('Nvidia Stock: Closing Price Over Time')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.xticks(rotation=45)
plt.show()

# Histogram of daily price change
plt.figure(figsize=(8, 6))
plt.hist(df['Price Change'], bins=20)
plt.title('Nvidia Stock: Daily Price Change')
plt.xlabel('Price Change')
plt.ylabel('Frequency')
plt.show()

# Scatter plot of high and low prices
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='High', y='Low')
plt.title('Nvidia Stock: High vs Low Prices')
plt.xlabel('High Price')
plt.ylabel('Low Price')
plt.show()

df['MA10'] = df['Close'].rolling(window=10).mean()
df['MA50'] = df['Close'].rolling(window=50).mean()

# Data visualization
# Line plot of closing price with moving averages
plt.figure(figsize=(10, 6))
plt.plot(df['Date'], df['Close'], label='Closing Price')
plt.plot(df['Date'], df['MA10'], label='MA10')
plt.plot(df['Date'], df['MA50'], label='MA50')
plt.title('Nvidia Stock: Closing Price with Moving Averages')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.xticks(rotation=45)
plt.legend()
plt.show()

# Correlation analysis
correlation = df[['Open', 'High', 'Low', 'Close', 'Volume']].corr()
sns.heatmap(correlation, annot=True, cmap='coolwarm')
plt.title('Nvidia Stock: Correlation Analysis')
plt.show()

# Candlestick chart
ohlc = df[['Date', 'Open', 'High', 'Low', 'Close']]
ohlc['Date'] = pd.to_datetime(ohlc['Date'])
ohlc.set_index('Date', inplace=True)
mpf.plot(ohlc, type='candle', title='Nvidia Stock: Candlestick Chart',
         ylabel='Price', mav=(10, 50), show_nontrading=True)
'''

# Select features for prediction
features = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']

# Scale the features using MinMaxScaler
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

# Create input features and target variable
X = df[features].values
y = df['Close'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Reshape the input data to fit the LSTM model (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

y_train = np.reshape(y_train, (y_train.shape[0], 1))
y_test = np.reshape(y_test, (y_test.shape[0], 1))


# Build the LSTM model
model = Sequential()
model.add(LSTM(64, input_shape=(1, len(features))))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Training the model
early_stop = EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), callbacks=[early_stop])

# Predict the stock prices
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Inverse transform the scaled data
y_train_pred = scaler.inverse_transform(y_train_pred.reshape(-1, 1)).flatten()
y_test_pred = scaler.inverse_transform(y_test_pred.reshape(-1, 1)).flatten()

y_test = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_test_pred = scaler.inverse_transform(y_test_pred.reshape(-1, 1)).flatten()




# Data visualization
plt.figure(figsize=(12, 6))
plt.plot(df['Date'], df['Close'], label='Actual Close Price')
plt.plot(df['Date'].iloc[:len(y_train_pred)], y_train_pred, label='Train Predictions')
plt.plot(df['Date'].iloc[-len(y_test_pred):], y_test_pred, label='Test Predictions')
plt.title('Nvidia Stock: Actual vs Predicted Close Price')
plt.xlabel('Date')
plt.ylabel('Closing Price')
plt.xticks(rotation=45)
plt.legend()
plt.show()



