In [3]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense

# --- 1. Data Collection and Preparation ---
# Fetch data for GOOG (Google) for model training
start_date = '2012-01-01'
end_date = '2022-12-31'
ticker = 'GOOG' 
data = yf.download(ticker, start=start_date, end=end_date)
data.reset_index(inplace=True)

# Drop any null values
data.dropna(inplace=True)

# Immediately after data.dropna(inplace=True)
print(f"Total rows downloaded: {len(data)}")
if len(data) == 0:
    print("FATAL ERROR: The downloaded data is empty. Check the ticker symbol and dates.")

# --- 2. Data Splitting and Scaling ---
# Split data into 80% train and 20% test
data_train_len = int(len(data) * 0.80)
data_train = pd.DataFrame(data['Close'][0:data_train_len])
data_test = pd.DataFrame(data['Close'][data_train_len:len(data)])

# Scale the training data (MinMaxScaler fits data between 0 and 1)
scaler = MinMaxScaler(feature_range=(0, 1))
data_train_scale = scaler.fit_transform(data_train)

# --- 3. Create Time-Series Input ---
# Use the last 100 days of data to predict the next day
X_train = []
Y_train = []
for i in range(100, data_train_scale.shape[0]):
    X_train.append(data_train_scale[i-100:i, 0])
    Y_train.append(data_train_scale[i, 0])

X_train, Y_train = np.array(X_train), np.array(Y_train)

# Reshape for LSTM input (samples, timesteps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# --- 4. Build and Train LSTM Model ---
model = Sequential()

# Layer 1
model.add(LSTM(units=50, activation='relu', return_sequences=True,
               input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))

# Layer 2
model.add(LSTM(units=60, activation='relu', return_sequences=True))
model.add(Dropout(0.3))

# Layer 3
model.add(LSTM(units=80, activation='relu', return_sequences=True))
model.add(Dropout(0.4))

# Layer 4 (Last LSTM layer - return_sequences=False by default)
model.add(LSTM(units=120, activation='relu'))
model.add(Dropout(0.5))

# Final Dense Layer (Output)
model.add(Dense(units=1))

# Compile and Fit the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, Y_train, epochs=50, batch_size=32, verbose=1)

# --- 5. Save the Model ---
# Save the model so the Streamlit app can use it
model.save('stock_predictions_model.keras')

print("Model trained and saved as 'stock_predictions_model.keras'")

  data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
  super().__init__(**kwargs)


Total rows downloaded: 2768
Epoch 1/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 107ms/step - loss: 0.0345
Epoch 2/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 99ms/step - loss: 0.0069
Epoch 3/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 104ms/step - loss: 0.0064
Epoch 4/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 121ms/step - loss: 0.0059
Epoch 5/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 139ms/step - loss: 0.0061
Epoch 6/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 127ms/step - loss: 0.0046
Epoch 7/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 145ms/step - loss: 0.0046
Epoch 8/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 147ms/step - loss: 0.0046
Epoch 9/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 137ms/step - loss: 0.0039
Epoch 10/50
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m