In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [7]:
# Load data
df = pd.read_csv('stock_data.csv', parse_dates=['Date'])
df.sort_values('Date', inplace=True)


In [8]:
# Features to use
features = ['Close', 'High', 'Low', 'Open', 'Volume']

feature_values = df[features].values

In [9]:
close_prices = df['Close'].values.reshape(-1, 1)
missing_idx = np.where(np.isnan(close_prices))[0]

feature_values_masked = feature_values.copy()
for i in range(feature_values_masked.shape[1]):
    col = feature_values_masked[:, i]
    mean_val = np.nanmean(col)
    col[np.isnan(col)] = mean_val
    feature_values_masked[:, i] = col

scaler = MinMaxScaler()
feature_scaled = scaler.fit_transform(feature_values_masked)

look_back = 30
X, y = [], []
for i in range(look_back, len(feature_scaled)):
    if not np.isnan(close_prices[i]):
        X.append(feature_scaled[i-look_back:i, :])
        y.append(feature_scaled[i, features.index('Close')])
X, y = np.array(X), np.array(y)


In [10]:
# Build Bidirectional LSTM model
model = Sequential([
    Bidirectional(LSTM(64, return_sequences=True), input_shape=(look_back, len(features))),
    Dropout(0.2),
    Bidirectional(LSTM(64)),
    Dropout(0.2),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
early_stop = EarlyStopping(monitor='loss', patience=5, verbose=1, mode='min')

  super().__init__(**kwargs)


In [11]:
model.fit(
    X, y,
    epochs=50,
    batch_size=32,
    callbacks=[early_stop]
)

Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - loss: 0.1274
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - loss: 0.0492
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0357
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0218
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0208
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0178
Epoch 7/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0134
Epoch 8/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0142
Epoch 9/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0121
Epoch 10/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0120
Epoch 11/50
[1m5/5

<keras.src.callbacks.history.History at 0x7d0933843cd0>

In [None]:
predictions = []
predicted_dates = []
for idx in missing_idx:
    if idx < look_back:
        # Not enough history: use simple imputation (forward fill, or mean)
        if idx == 0:
            window_size = 20
            half_window = window_size // 2
            start = max(0, idx - half_window)
            end = min(len(close_prices), idx + half_window)
            window = close_prices[start:end]
            imputed = np.nanmean(window)
        else:
            prev_idx = idx - 1
            while prev_idx >= 0 and np.isnan(close_prices[prev_idx]):
                prev_idx -= 1
            if prev_idx >= 0:
                imputed = close_prices[prev_idx]
            else:
                imputed = np.nanmean(close_prices)
        predictions.append(imputed)
        predicted_dates.append(df.loc[idx, 'Date'])
        continue
    seq = feature_scaled[idx-look_back:idx, :].reshape(1, look_back, len(features))
    pred = model.predict(seq, verbose=0)
    pred_full = np.zeros((1, len(features)))
    pred_full[0, features.index('Close')] = pred[0, 0]
    pred_inv = scaler.inverse_transform(pred_full)[0, features.index('Close')]
    predictions.append(pred_inv)
    predicted_dates.append(df.loc[idx, 'Date'])

# Clean up predictions: flatten any lists to floats
predictions_clean = []
for p in predictions:
    if isinstance(p, (list, tuple, np.ndarray)) and len(p) == 1:
        predictions_clean.append(float(p[0]))
    else:
        predictions_clean.append(float(p))

In [None]:
output = pd.DataFrame({
    'Date': [d.strftime('%Y-%m-%d') for d in predicted_dates],
    'Close': predictions_clean
})

output.dropna(inplace=True)
output.to_csv('submission.csv', index=False)