In [None]:
import boto3
import pandas as pd
import numpy as np
import json
import time
from io import StringIO
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler

# --- S3 and Data Configuration ---
S3_BUCKET = 'alpha-stock'        
DATA_PREFIX = 'alphavantage_data/'     
PREDICTIONS_OUTPUT_KEY = 'predictions/predicted_stock_prices.json'

# Initialize the S3 client 
s3 = boto3.client('s3')

def load_data_from_s3(bucket, prefix=''):
    response = s3.list_objects_v2(Bucket=bucket, Prefix=prefix)
    data_frames = []
    
    for obj in response.get('Contents', []):
        key = obj['Key']
        if key.endswith('.json'):
            obj_body = s3.get_object(Bucket=bucket, Key=key)['Body'].read().decode('utf-8')
            data_json = json.loads(obj_body)
            if "Time Series (1min)" in data_json:
                ts_data = data_json["Time Series (1min)"]
                df = pd.DataFrame.from_dict(ts_data, orient='index')
                # Convert all columns to float and reset index as timestamp
                df = df.astype(float)
                df.index = pd.to_datetime(df.index)
                df.sort_index(inplace=True)
                data_frames.append(df)
    
    if data_frames:
        combined_df = pd.concat(data_frames)
        combined_df.sort_index(inplace=True)
        return combined_df
    else:
        print("No JSON files found in S3 matching the criteria.")
        return None

def preprocess_data(df):
    data = df['4. close'].values.reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)
    return scaled_data, scaler

def create_dataset(dataset, time_step=60):
    X, y = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 0]
        X.append(a)
        y.append(dataset[i + time_step, 0])
    return np.array(X), np.array(y)

def build_model(time_step):
    """
    Build and compile a simple LSTM network.
    """
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1)))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

def upload_predictions_to_s3(bucket, key, predictions):
    predictions_json = json.dumps(predictions)
    s3.put_object(Bucket=bucket, Key=key, Body=predictions_json)
    print(f"Uploaded predictions to s3://{bucket}/{key}")

def main():
    # --- Step 1: Load Data ---
    df = load_data_from_s3(S3_BUCKET, DATA_PREFIX)
    if df is None:
        return
    print("Data loaded from S3. Total records:", len(df))
    
    # --- Step 2: Preprocess Data ---
    scaled_data, scaler = preprocess_data(df)
    time_step = 60  # Use 60 timesteps (adjust based on your data frequency)
    X, y = create_dataset(scaled_data, time_step)
    X = X.reshape(X.shape[0], X.shape[1], 1)
    
    # --- Step 3: Split Data for Training and Testing ---
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    
    # --- Step 4: Build and Train the LSTM Model ---
    model = build_model(time_step)
    model.fit(X_train, y_train, batch_size=64, epochs=10, validation_data=(X_test, y_test))
    
    # --- Step 5: Predict Future Price ---
    # Use the last 'time_step' records to predict the next closing price
    last_sequence = scaled_data[-time_step:].reshape(1, time_step, 1)
    predicted_price_scaled = model.predict(last_sequence)
    predicted_price = scaler.inverse_transform(predicted_price_scaled)
    
    predictions = {
        "predicted_price": predicted_price[0][0],
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())
    }
    print("Predicted Stock Price:", predictions["predicted_price"])
    
    # --- Step 6: Upload Predictions to S3 ---
    upload_predictions_to_s3(S3_BUCKET, PREDICTIONS_OUTPUT_KEY, predictions)

if __name__ == '__main__':
    main()
