In [10]:
# Function to fetch cryptocurrency data
def fetch_crypto_data():
    try:
        url = 'https://min-api.cryptocompare.com/data/v2/histohour'
        params = {
            'fsym': 'BTC',
            'tsym': 'USD',
            'limit': 1000,   # Number of data points
            'aggregate': 1,  # Hourly data
        }
        response = requests.get(url, params=params)
        data = response.json()['Data']['Data']  # Extracting the historical data
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

# Function to preprocess data
def preprocess_data(data, window_size):
    if not isinstance(data, list) or not isinstance(data[0], dict):
        raise ValueError("Input data must be a list of dictionaries")

    prices = [entry['close'] for entry in data]

    scaler = MinMaxScaler(feature_range=(0, 1))
    prices_normalized = scaler.fit_transform(np.array(prices).reshape(-1, 1))

    X, y = [], []
    for i in range(len(prices_normalized) - window_size):
        X.append(prices_normalized[i:i + window_size, 0])
        y.append(prices_normalized[i + window_size, 0])
    X, y = np.array(X), np.array(y)
    X = np.reshape(X, (X.shape[0], X.shape[1], 1))

    return X, y, scaler

# Function to build LSTM model
def build_lstm_model(input_shape, lstm_units, dropout_rate):
    model = Sequential()
    model.add(LSTM(units=lstm_units, input_shape=input_shape))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Function to make predictions
def make_predictions(model, data, scaler, window_size, num_predictions):
    predictions = []
    
    # Extract the last window_size data points from the original data (not from X)
    prices = [entry['close'] for entry in data[-window_size:]]
    prices_normalized = scaler.transform(np.array(prices).reshape(-1, 1))
    last_sequence = np.reshape(prices_normalized, (1, window_size, 1))
    
    for _ in range(num_predictions):
        prediction = model.predict(last_sequence)[0, 0]
        predictions.append(prediction)
        last_sequence = np.append(last_sequence[:, 1:, :], [[prediction]], axis=1)
    
    predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
    return predictions.flatten()

# Function to evaluate model
def evaluate_model(model, X, y, scaler):
    # Predict on X
    y_pred = model.predict(X)
    
    # Inverse transform to get actual values
    y_pred_inv = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
    y_inv = scaler.inverse_transform(y.reshape(-1, 1)).flatten()
    
    # Calculate metrics
    mse = mean_squared_error(y_inv, y_pred_inv)
    mae = mean_absolute_error(y_inv, y_pred_inv)
    
    print(f'Mean Squared Error (MSE): {mse}')
    print(f'Mean Absolute Error (MAE): {mae}')

# Ensure you pass X and y directly for evaluation
#evaluate_model(model, X, y, scaler)

# Main function for training and predicting (SageMaker entry point)
if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    # Add arguments for hyperparameters
    parser.add_argument('--window_size', type=int, default=10)
    parser.add_argument('--lstm_units', type=int, default=50)
    parser.add_argument('--dropout_rate', type=float, default=0.2)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--batch_size', type=int, default=32)

    args, _ = parser.parse_known_args()

    # Set up MLFlow
    mlflow.set_tracking_uri('your_mlflow_tracking_uri')  # Replace with your MLFlow tracking URI
    mlflow.set_experiment('your_experiment_name')  # Replace with your MLFlow experiment name

    # Start MLFlow run
    with mlflow.start_run():
        # Log parameters
        mlflow.log_param('window_size', args.window_size)
        mlflow.log_param('lstm_units', args.lstm_units)
        mlflow.log_param('dropout_rate', args.dropout_rate)
        mlflow.log_param('epochs', args.epochs)
        mlflow.log_param('batch_size', args.batch_size)

        # Fetch data
        crypto_data = fetch_crypto_data()

        # Preprocess data
        X, y, scaler = preprocess_data(crypto_data, args.window_size)

        # Build LSTM model
        input_shape = (X.shape[1], 1)
        model = build_lstm_model(input_shape, args.lstm_units, args.dropout_rate)

        # Train the model
        history = model.fit(X, y, epochs=args.epochs, batch_size=args.batch_size, verbose=1)

        # Evaluate the model
        evaluate_model(model, X, y, scaler)

        # Log model with MLFlow
        mlflow.keras.log_model(model, 'model')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




Mean Squared Error (MSE): 189264.22715840722
Mean Absolute Error (MAE): 317.49970421291624
INFO:tensorflow:Assets written to: /tmp/tmp97yjw_pe/model/data/model/assets


INFO:tensorflow:Assets written to: /tmp/tmp97yjw_pe/model/data/model/assets


In [2]:
!pip install mlflow

Collecting mlflow
  Downloading mlflow-2.14.1-py3-none-any.whl.metadata (29 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Using cached alembic-1.13.1-py3-none-any.whl.metadata (7.4 kB)
Collecting graphene<4 (from mlflow)
  Using cached graphene-3.3-py2.py3-none-any.whl.metadata (7.7 kB)
Collecting opentelemetry-api<3,>=1.9.0 (from mlflow)
  Using cached opentelemetry_api-1.25.0-py3-none-any.whl.metadata (1.4 kB)
Collecting opentelemetry-sdk<3,>=1.9.0 (from mlflow)
  Using cached opentelemetry_sdk-1.25.0-py3-none-any.whl.metadata (1.4 kB)
Collecting querystring-parser<2 (from mlflow)
  Using cached querystring_parser-1.2.4-py2.py3-none-any.whl.metadata (559 bytes)
Collecting gunicorn<23 (from mlflow)
  Using cached gunicorn-22.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting Mako (from alembic!=1.10.0,<2->mlflow)
  Using cached Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Using cached graphql_core-3.2.3-py3-none-any.whl