# Project Details

```
Type: Time Series
Dataset: jena_climate_2009_2016.csv
Model architecture: LSTM and Ensemble of models
```

### Install required Libraries

In [1]:
!pip install kfp==2.8.0




[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


### Check Readme file to install additional components for kubeflow pipeline such as kserve, push gateway, minio s3 storage alike, cluster roles

# Project Walkthrough as follows

```
python version 3.8.9
kfp version 2.8.0
kubernetes version 1.29.1
kubeflow pipeline version 2.2.0
```

**Pipeline componentes:**
- Data Prepration component     (Data downloading and preprocessing step)
- Training Model component     (Model training using both LSTM and ensemble of models Step)
  - Training Model using LSTM architecture Component
  - Training Model using ensemble of models architecture Component
- Evaluating Trained Model component      (Evaluation Step)
- Backtesting on Trained Model component    (Backtesting Step for time series data)
- Perforamance Check for best model to deploy Component    (Best model finding between trained model step)
- Deploy Model Component    (Deploying the Best model)
- Monitor Model Component    (Monitor The deployed model)

**Arguments for kubefow time-series experiment pipeline as name depicts to be define:**
```
arguments={
        "epochs": 1, 
        "service_account_name" : "sa-minio-kserve", 
        "namespace" : "kubeflow", 
        "service_name" : "weather-forcast-service",
        "kserve_version" :"v1beta1"
        }
```

# Importing Required Libraries

In [None]:
from kfp.v2.dsl import component, Input, Output, Dataset, Model, Metrics
from kfp import dsl
import kfp

# Pipeline Components

### Data Prepration Component

In [None]:
@component(packages_to_install=['pandas', 'numpy', 'scikit-learn', 'tensorflow'])
def preprocess_data(output_data: Output[Dataset]):
    import pandas as pd
    import numpy as np
    from sklearn.impute import SimpleImputer
    from sklearn.preprocessing import RobustScaler
    from sklearn.model_selection import train_test_split
    import requests
    import zipfile
    import io
    import os
    
    url = 'https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip'

    response = requests.get(url)
    response.raise_for_status()  # Check for HTTP errors
    zip_file = io.BytesIO(response.content)

    # Unzip the file
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall()
    
    csv_path = 'jena_climate_2009_2016.csv'
    # Load the dataset
    df = pd.read_csv(csv_path)

    # Convert the date-time column to datetime format and drop it from the DataFrame
    df['Date Time'] = pd.to_datetime(df['Date Time'], format='%d.%m.%Y %H:%M:%S')
    df_numeric = df.drop(columns=['Date Time'])  # Keep only numeric columns

    # Handle missing values
    imputer = SimpleImputer(strategy='mean')
    df_imputed = pd.DataFrame(imputer.fit_transform(df_numeric), columns=df_numeric.columns)

    # Handle outliers using RobustScaler
    scaler = RobustScaler()
    df_scaled = pd.DataFrame(scaler.fit_transform(df_imputed), columns=df_imputed.columns)

    # Split the data into train, validation, and test sets
    train_val, test = train_test_split(df_scaled, test_size=0.2, shuffle=False)
    train, val = train_test_split(train_val, test_size=0.2, shuffle=False)
    
    # Save preprocessed data
    data_path = output_data.path
    os.makedirs(data_path, exist_ok=True)
    np.save(os.path.join(data_path, 'train.npy'), train)
    np.save(os.path.join(data_path, 'val.npy'), val)
    np.save(os.path.join(data_path, 'test.npy'), test)
    
    print("Data preprocessing is complete.")


### Training Model Component

#### Training Model using LSTM architecture Component

In [None]:
@component(packages_to_install=['tensorflow', 'numpy', 'scikit-learn'])
def train_model(input_data: Input[Dataset], trained_model: Output[Model], train_metrics: Output[Metrics], epochs: int = 10):
    import tensorflow as tf
    import numpy as np
    import os
    
    # Load preprocessed data
    train_data = np.load(os.path.join(input_data.path, 'train.npy'))
    val_data = np.load(os.path.join(input_data.path, 'val.npy'))
    
    # Prepare data for LSTM
    def create_dataset(data, time_steps=1):
        X, y = [], []
        for i in range(len(data) - time_steps):
            v = data[i:(i + time_steps), 0]
            X.append(v)
            y.append(data[i + time_steps, 0])
        return np.array(X), np.array(y)

    time_steps = 24  # Use 24 hours of data to predict the next hour
    X_train, y_train = create_dataset(train_data, time_steps)
    X_val, y_val = create_dataset(val_data, time_steps)
    
    # Reshape input to be [samples, time steps, features]
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], 1))
    
    # Define and compile LSTM model
    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(50, activation='relu', input_shape=(time_steps, 1)),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    
    # Train the model
    history = model.fit(X_train, y_train, epochs=epochs, validation_data=(X_val, y_val), verbose=1)
    
    # Save the trained model
    model.save(os.path.join(trained_model.path, '1')) ## here '1' is version of model
    
    # Log metrics
    train_metrics.log_metric("train_loss", history.history['loss'][-1])
    train_metrics.log_metric("val_loss", history.history['val_loss'][-1])
    
    print("Model training is complete.")


#### Training Model using ensemble of models architecture Component

In [None]:
@component(packages_to_install=['tensorflow', 'numpy', 'scikit-learn'])
def ensemble_models(input_data: Input[Dataset], input_model: Input[Model], ensemble_model: Output[Model], ensemble_metrics: Output[Metrics]):
    import tensorflow as tf
    import numpy as np
    import os
    from sklearn.metrics import mean_squared_error, mean_absolute_error

    # Load test data
    test_data = np.load(os.path.join(input_data.path, 'test.npy'))
    
    # Prepare test data for LSTM
    def create_dataset(data, time_steps=1):
        X, y = [], []
        for i in range(len(data) - time_steps):
            v = data[i:(i + time_steps), 0]
            X.append(v)
            y.append(data[i + time_steps, 0])
        return np.array(X), np.array(y)

    time_steps = 24
    X_test, y_test = create_dataset(test_data, time_steps)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    # Load the trained model
    base_model = tf.keras.models.load_model(os.path.join(input_model.path, '1')) ## here '1' is version of model
    
    # Create an ensemble of models
    models = [base_model]

    # Add a CNN model
    cnn_model = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(64, 3, activation='relu', input_shape=(time_steps, 1)),
        tf.keras.layers.MaxPooling1D(2),
        tf.keras.layers.Conv1D(128, 3, activation='relu'),
        tf.keras.layers.MaxPooling1D(2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(50, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    cnn_model.compile(optimizer='adam', loss='mse')
    models.append(cnn_model)

    # Add a simple Dense model
    dense_model = tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(time_steps, 1)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)
    ])
    dense_model.compile(optimizer='adam', loss='mse')
    models.append(dense_model)
    
    # Train additional models
    for model in models[1:]:
        model.fit(X_test, y_test, epochs=5, verbose=0)
    
    # Make predictions with the ensemble
    ensemble_predictions = np.mean([model.predict(X_test) for model in models], axis=0)
    
    # Calculate metrics
    mse = mean_squared_error(y_test, ensemble_predictions)
    mae = mean_absolute_error(y_test, ensemble_predictions)
    
    # Log metrics
    ensemble_metrics.log_metric("ensemble_mse", mse)
    ensemble_metrics.log_metric("ensemble_mae", mae)
    
    # Save the ensemble model
    ensemble_model_path = os.path.join(ensemble_model.path, '1') ## here '1' is version of ensemble of models
    os.makedirs(ensemble_model_path, exist_ok=True)
    for i, model in enumerate(models):
        model.save(os.path.join(ensemble_model_path, f'model_{i}'))
    
    print("Ensemble model creation and evaluation is complete.")


### Evaluating Trained Model Component

In [None]:
@component(packages_to_install=['tensorflow', 'numpy', 'scikit-learn', 'matplotlib'])
def evaluate_model(input_data: Input[Dataset], input_model: Input[Model], evaluation_metrics: Output[Metrics]):
    import tensorflow as tf
    import numpy as np
    import os
    import matplotlib.pyplot as plt
    from sklearn.metrics import mean_squared_error, mean_absolute_error
    
    # Load test data
    test_data = np.load(os.path.join(input_data.path, 'test.npy'))
    
    # Prepare test data for LSTM
    def create_dataset(data, time_steps=1):
        X, y = [], []
        for i in range(len(data) - time_steps):
            v = data[i:(i + time_steps), 0]
            X.append(v)
            y.append(data[i + time_steps, 0])
        return np.array(X), np.array(y)

    time_steps = 24
    X_test, y_test = create_dataset(test_data, time_steps)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    # Load the trained model
    model = tf.keras.models.load_model(os.path.join(input_model.path, '1')) ## here '1' is version of model
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    
    # Log metrics
    evaluation_metrics.log_metric("mse", mse)
    evaluation_metrics.log_metric("mae", mae)
    
    # Plot actual vs predicted values
    plt.figure(figsize=(12, 6))
    plt.plot(y_test[:100], label='Actual')
    plt.plot(y_pred[:100], label='Predicted')
    plt.legend()
    plt.title('Actual vs Predicted Values')
    plt.savefig(os.path.join(input_model.path, 'actual_vs_predicted.png'))
    
    print("Model evaluation is complete.")


### Backtesting on Trained Model Component

In [None]:
@component(packages_to_install=['tensorflow', 'numpy', 'scikit-learn', 'pandas'])
def backtest_model(input_data: Input[Dataset], input_model: Input[Model], backtest_metrics: Output[Metrics]):
    import tensorflow as tf
    import numpy as np
    import os
    from sklearn.metrics import mean_squared_error, mean_absolute_error
    import pandas as pd
    
    # Load test data
    test_data = np.load(os.path.join(input_data.path, 'test.npy'))
    
    # Prepare test data for LSTM
    def create_dataset(data, time_steps=1):
        X, y = [], []
        for i in range(len(data) - time_steps):
            v = data[i:(i + time_steps), 0]
            X.append(v)
            y.append(data[i + time_steps, 0])
        return np.array(X), np.array(y)
    
    time_steps = 24  # 24 hours
    X_test, y_test = create_dataset(test_data, time_steps)
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    # Load the trained model
    model = tf.keras.models.load_model(os.path.join(input_model.path, '1')) ## here '1' is version of model
    
    # Initialize arrays for backtesting results
    predictions = []
    actuals = []
    
    # Perform backtesting
    # for i in range(X_test.shape[0]):
    for i in range(500):
        # Make a prediction
        X_input = X_test[i:i+1]
        y_pred = model.predict(X_input)
        
        # Store the prediction and actual value
        predictions.append(y_pred[0][0])
        actuals.append(y_test[i])
    
    # Convert lists to numpy arrays for easier metric calculation
    predictions = np.array(predictions)
    actuals = np.array(actuals)
    
    # Calculate metrics
    mse = mean_squared_error(actuals, predictions)
    mae = mean_absolute_error(actuals, predictions)
    
    # Log metrics
    backtest_metrics.log_metric("backtest_mse", mse)
    backtest_metrics.log_metric("backtest_mae", mae)
    
    print(f"Backtesting complete. MSE: {mse}, MAE: {mae}.")


### Perforamance Check for best model to deploy Component

In [None]:
@component()
def preformance_check(
    evaluation_metrics: Input[Metrics],
    ensemble_metrics: Input[Metrics],
    deploy_model_metrics: Output[Metrics],
    ensemble_model: Input[Model],
    train_model: Input[Model],
    deploy_model: Output[Model]):
    # Create list of tuples with model metrics and corresponding model
    models = [
        (evaluation_metrics.metadata.get('mse'), evaluation_metrics.metadata.get('mae')),
        (ensemble_metrics.metadata.get('ensemble_mse'), ensemble_metrics.metadata.get('ensemble_mae'))
    ]

    # Find the index of the model with the minimum MSE and MAE
    best_model_index = min(range(len(models)), key=lambda i: (models[i][0], models[i][1]))

    # Update deploy_model_metrics and deploy_model based on the selected model
    if best_model_index == 0:
        deploy_model_metrics.metadata.update(evaluation_metrics.metadata)
        deploy_model.path = train_model.path  
    else:
        deploy_model_metrics.metadata.update(ensemble_metrics.metadata)
        deploy_model.path =  ensemble_model.path 
    
    # Output the results
    print("Deploy Model Metrics:", deploy_model_metrics.metadata)
    print("Deploy Model:", deploy_model.path)
    print("\n---- Models Performance Check Complete ----")

### Deploy model using kserve Component

In [None]:
@component(packages_to_install=['kserve', 'kubernetes'])
def deploy_model(input_model: Input[Model], service_account_name:str="sa-minio-kserve", namespace:str= "kubeflow", service_name:str="weather-model", kserve_version:str="v1beta1"):
    """
    Create kserve instance
    """
    from kubernetes import client, config
    from kserve import KServeClient
    from kserve import constants
    from kserve import utils
    from kserve import V1beta1InferenceService
    from kserve import V1beta1InferenceServiceSpec
    from kserve import V1beta1PredictorSpec
    from kserve import V1beta1TFServingSpec
    from datetime import datetime

    uri = input_model.uri.replace('minio://', '')
    input_model_path = f"s3://{uri}"

    # namespace = utils.get_default_target_namespace()
    

    config.load_incluster_config()

    now = datetime.now()
    v = now.strftime("%Y-%m-%d--%H-%M-%S")

    api_version = constants.KSERVE_GROUP + '/' + kserve_version

    isvc = V1beta1InferenceService(api_version=api_version,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name, namespace=namespace, annotations={'sidecar.istio.io/inject':'false'}),
                                   spec=V1beta1InferenceServiceSpec(
                                   predictor=V1beta1PredictorSpec(
                                       service_account_name=service_account_name,
                                       tensorflow=(V1beta1TFServingSpec(
                                           storage_uri=input_model_path))))
    )

    KServe = KServeClient()
    KServe.create(isvc)

    print(f'Model deployed as an InferenceService: {service_name}')



### Monitor Deploy Model using prometheus Component

In [None]:
@component(packages_to_install=['prometheus-client', 'matplotlib', 'psutil'])
def monitor_model():
    """
    This component simulates model monitoring by generating and pushing
    performance metrics to a Prometheus Pushgateway.
    """
    from prometheus_client import CollectorRegistry, Gauge, push_to_gateway
    import time
    import psutil  # To monitor resource utilization

    # Initialize Prometheus registry and gauges
    registry = CollectorRegistry()
    performance_gauge = Gauge('model_performance', 'Track model performance', registry=registry)
    cpu_usage_gauge = Gauge('cpu_usage', 'Track CPU usage percentage', registry=registry)
    memory_usage_gauge = Gauge('memory_usage', 'Track memory usage percentage', registry=registry)

    for i in range(10):
        # Simulating metrics update
        accuracy = 0.85 + 0.01 * i  # Simulate increasing accuracy
        performance_gauge.set(accuracy)

        # Capture resource utilization
        cpu_usage = psutil.cpu_percent()
        memory_usage = psutil.virtual_memory().percent
        cpu_usage_gauge.set(cpu_usage)
        memory_usage_gauge.set(memory_usage)

        # Push metrics to Prometheus Pushgateway
        push_to_gateway('pushgateway:9091', job='model_monitoring', registry=registry)
        time.sleep(30)  # Sleep for 30 seconds to simulate real-time tracking

    print("Model performance and resource utilization metrics sent to Prometheus.")


# Combined all above Components for Kubeflow pipeline

In [None]:
@dsl.pipeline(
    name='Time Series Forecasting Pipeline',
    description='A pipeline to train, deploy, and monitor a time series forecasting model.'
)
def time_series_pipeline(epochs: int = 10, service_account_name:str="sa-minio-kserve", namespace:str= "kubeflow", service_name:str="weather-model", kserve_version:str="v1beta1"):

    preprocess_task = preprocess_data()

    train_task = train_model(
        input_data=preprocess_task.outputs['output_data'], 
        epochs=epochs)
    
    evaluate_task = evaluate_model(
        input_data=preprocess_task.outputs['output_data'], 
        input_model=train_task.outputs['trained_model'])
    
    ensemble_task = ensemble_models(
        input_data=preprocess_task.outputs['output_data'], 
        input_model=train_task.outputs['trained_model'])
    
    preformance_check_task = preformance_check(
        evaluation_metrics =evaluate_task.outputs['evaluation_metrics'],
        ensemble_metrics = ensemble_task.outputs['ensemble_metrics'],
        ensemble_model = ensemble_task.outputs['ensemble_model'],
        train_model = train_task.outputs['trained_model']
    )

    deploy_task = deploy_model(
        input_model=preformance_check_task.outputs['deploy_model'],
        service_account_name=service_account_name, 
        namespace= namespace, 
        service_name= service_name,
        kserve_version= kserve_version
    )
    

    monitor_task = monitor_model().after(deploy_task)

    preprocess_task.set_caching_options(True)
    train_task.set_caching_options(True)
    evaluate_task.set_caching_options(True)
    ensemble_task.set_caching_options(True)
    preformance_check_task.set_caching_options(True)
    deploy_task.set_caching_options(False)
    monitor_task.set_caching_options(False)



# Compile Kubeflow pipline and Run it on Kubefow server

In [10]:

# Compile the pipeline
kfp.compiler.Compiler().compile(
    pipeline_func=time_series_pipeline,
    package_path='time_series_pipeline.yaml'
)

client = kfp.Client()

client.create_run_from_pipeline_func(
    time_series_pipeline,
    arguments={
        "epochs": 1, 
        "service_account_name" : "sa-minio-kserve", 
        "namespace" : "kubeflow", 
        "service_name" : "weather-forcast-service",
        "kserve_version" :"v1beta1"
        },

    experiment_name='time_series_experiment'
)

RunPipelineResult(run_id=7b7d1378-ffe7-4ee9-9baf-953e0fcdb71f)