<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/FineTuning_VertexAISDK_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installations

In [None]:
!pip install google-cloud-aiplatform -q
!pip install google-cloud-storage -q
!pip install google-cloud-bigquery -q
!pip install google-cloud-bigquery-storage -q
!pip install google-cloud-aiplatform -q
!pip install datasets -q
!pip install colab-env -q

# Install necessary libraries
!pip install  -q gcsfs==2024.3.1
!pip install  -q accelerate==0.31.0
!pip install  -q transformers==4.45.2
!pip install  -q  datasets==2.19.2
!pip install google-cloud-aiplatform[all] -q
!pip install vertexai  -q
!pip install tensorflow_datasets -q

!pip install google-cloud-aiplatform -q -U

print('\n')


import google.cloud.bigquery
import google.cloud.bigquery_storage
import google.cloud.aiplatform
import google.cloud.storage

print(f"google-cloud-aiplatform: {google.cloud.aiplatform.__version__}")
print(f"google-cloud-storage: {google.cloud.storage.__version__}")
print(f"google-cloud-bigquery: {google.cloud.bigquery.__version__}")
print(f"google-cloud-bigquery-storage: {google.cloud.bigquery_storage.__version__}")
print(f"google-cloud-aiplatform: {google.cloud.aiplatform.__version__}")

In [5]:
import datasets
import colab_env
print('\n\n')
print(f"datasets: {datasets.__version__}")
print(f"colab-env: {colab_env.__version__}")
import google.cloud.aiplatform
import google.cloud.storage
print(f"google-cloud-aiplatform: {google.cloud.aiplatform.__version__}")
print(f"google-cloud-storage: {google.cloud.storage.__version__}")

Mounted at /content/gdrive



datasets: 2.19.2
colab-env: 0.2.0
google-cloud-aiplatform: 1.87.0
google-cloud-storage: 2.19.0


## data preparation

In [None]:
!gsutil cp gs://{BUCKET_NAME}/cmapss_FD004_train_text.jsonl .
!gsutil cp gs://{BUCKET_NAME}/cmapss_FD004_test_text.jsonl .

In [None]:
import json

def validate_jsonl_format(file_path):
    with open(file_path, 'r') as f:
        for line in f:
            try:
                data = json.loads(line)
                if "prompt" not in data or "completion" not in data:
                    print(f"Invalid format in line: {line}")
                    return False  # Indicate invalid format
            except json.JSONDecodeError:
                print(f"Invalid JSON in line: {line}")
                return False  # Indicate invalid JSON
    return True  # Indicate valid format

# Example usage
if validate_jsonl_format("cmapss_FD004_train_text.jsonl"):
    print("Training data has valid format.")

else:
    print("Training data has invalid format.")

if validate_jsonl_format("cmapss_FD004_test_text.jsonl"):
    print("Testing data has valid format.")
else:
    print("Testing data has invalid format.")

In [None]:
import json

def transform_jsonl_to_prompt_completion(input_file_path, output_file_path):
    """Transforms chat-style JSONL to prompt-completion JSONL."""
    with open(input_file_path, 'r') as infile, open(output_file_path, 'w') as outfile:
        for line in infile:
            try:
                data = json.loads(line)
                # Extract prompt and completion from 'contents'
                prompt = "".join([part["text"] for part in data["contents"][0]["parts"]])  # Assumes user role is first
                completion = str(data.get("completion", "")) # Handle if completion is missing

                # Construct prompt-completion dictionary
                prompt_completion_data = {"prompt": prompt, "completion": completion}

                # Write to output file
                outfile.write(json.dumps(prompt_completion_data) + "\n")

            except (json.JSONDecodeError, KeyError, IndexError) as e:
                print(f"Skipping invalid or unprocessable line: {line.strip()}, Error: {e}")

# Example usage:
input_file_path = "cmapss_FD004_train_text.jsonl"
output_file_path = "cmapss_FD004_train_text_transformed.jsonl"

transform_jsonl_to_prompt_completion(input_file_path, output_file_path)
print(f"Transformed data written to: {output_file_path}")

input_file_path = "cmapss_FD004_test_text.jsonl"
output_file_path = "cmapss_FD004_test_text_transformed.jsonl"

transform_jsonl_to_prompt_completion(input_file_path, output_file_path)
print(f"Transformed data written to: {output_file_path}")


In [None]:
import json

def validate_jsonl_format(file_path):
    with open(file_path, 'r') as f:
        for line in f:
            try:
                data = json.loads(line)
                if "prompt" not in data or "completion" not in data:
                    print(f"Invalid format in line: {line}")
                    return False  # Indicate invalid format
            except json.JSONDecodeError:
                print(f"Invalid JSON in line: {line}")
                return False  # Indicate invalid JSON
    return True  # Indicate valid format

# Example usage
if validate_jsonl_format("cmapss_FD004_train_text_transformed.jsonl"):
    print("Training data has valid format.")

else:
    print("Training data has invalid format.")

if validate_jsonl_format("cmapss_FD004_test_text_transformed.jsonl"):
    print("Testing data has valid format.")
else:
    print("Testing data has invalid format.")

In [None]:
!gsutil cp cmapss_FD004_train_text_transformed.jsonl gs://{BUCKET_NAME}/
!gsutil cp cmapss_FD004_test_text_transformed.jsonl gs://{BUCKET_NAME}/

## FINE TUNING - NASA DATASET

In [10]:
!gsutil ls gs://{BUCKET_NAME}/*text*

gs://poc-my-new-staging-bucket-2025-1/cmapss_FD004_test_text.jsonl
gs://poc-my-new-staging-bucket-2025-1/cmapss_FD004_train_text.jsonl


In [30]:
# --- train.py content ---
train_py_content = """
import argparse
import os
import json
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import time
import subprocess
from google.cloud import storage
import pandas as pd
from typing import List
import numpy as np
import torch.utils.data as data
import vertexai
from vertexai.preview.language_models import TextGenerationModel
import tensorflow as tf  # Import TensorFlow for tf.io.gfile
from google.cloud import aiplatform
from google.cloud.aiplatform import jobs
from google.cloud.aiplatform import models

# Initialize Vertex AI with project ID and region from environment variables
PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
REGION = os.environ.get("GOOGLE_CLOUD_REGION")
vertexai.init(project=PROJECT_ID, location=REGION)

logging.basicConfig(level=logging.INFO)

# --- Helper Functions ---
def create_gcs_dir(model_dir):
    try:
        storage_client = storage.Client()
        bucket_name = model_dir.split('/')[2]
        blob_prefix = '/'.join(model_dir.split('/')[3:])
        bucket = storage_client.bucket(bucket_name)

        subdirs = blob_prefix.split('/')
        current_prefix = ''
        for subdir in subdirs:
            current_prefix = os.path.join(current_prefix, subdir)
            blob = bucket.blob(current_prefix + '/')
            blob.upload_from_string('')
            logging.info("Created GCS directory: %s", current_prefix)
    except Exception as e:
        logging.error(f"Error creating GCS directory: {e}")
        raise

def load_jsonl_dataset(data_path, sequence_length=30):
    data = []
    try:
        with tf.io.gfile.GFile(data_path, 'r') as f:
            for line in f:
                try:
                    record = json.loads(line)
                    prompt = record["prompt"]
                    completion = record["completion"]
                    data.append((prompt, completion))
                except json.JSONDecodeError as e:
                    logging.warning("Skipping invalid JSON line: %r, Error: %s", repr(line), e)

        return data

    except Exception as e:
        logging.error(f"Error loading dataset: {e}")
        raise

# Function to generate embeddings using Gemini Pro (Modified for fine-tuning)
def generate_embeddings(text, model):
    response = model.embed_text(text)
    return response.embeddings

# --- Dataset Class (Updated to Convert to Text) ---
class CMAPSSJSONLDataset(data.Dataset):
    def __init__(self, data_path, sequence_length=30, use_rolling_features=False, model=None):
        self.data = load_jsonl_dataset(data_path, sequence_length)
        self.sequence_length = sequence_length
        self.use_rolling_features = use_rolling_features
        self.model = model  # Store the model for embedding generation

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        prompt, completion = self.data[idx]

        # Generate embeddings using the provided model
        embeddings = generate_embeddings(prompt, self.model)

        # Return embeddings and completion as a tuple
        return torch.tensor(embeddings), completion  # Return embeddings and completion

# --- Model Definition (Using Gemini Embeddings and LSTM) ---
class RULPredictionModel(nn.Module):
    def __init__(self, embedding_dim, hidden_size, num_layers=2, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(embedding_dim, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, 1)  # Output layer for RUL prediction

    def forward(self, x):
        # Pass embeddings through LSTM
        out, _ = self.lstm(x)
        # Take the output from the last time step
        out = out[:, -1, :]
        # Pass through fully connected layer for RUL prediction
        out = self.fc(out)
        return out

# --- Training Function (Modified for fine-tuning) ---
def train_model(model_name, train_dataset_path, eval_dataset_path,
                staging_bucket, bucket_name, base_output_dir,
                use_rolling_features=False):

    logging.info("Training configuration:")
    logging.info(f"Model name: {model_name}")
    logging.info(f"Train Dataset Path: {train_dataset_path}")
    logging.info(f"Eval Dataset Path: {eval_dataset_path}")
    logging.info(f"Staging Bucket: {staging_bucket}")
    logging.info(f"Bucket Name: {bucket_name}")
    logging.info(f"Base Output Dir: {base_output_dir}")
    logging.info(f"Use Rolling Features: {use_rolling_features}")

    # 1. Data Loaders (Initially using the base model for loading)
    base_model = TextGenerationModel.from_pretrained("text-bison@001")

    # 2. Model Initialization (Updated for fine-tuning)
    # Load the base Gemini Pro model for fine-tuning

    # Create and run the fine-tuning job
    fine_tuning_job = jobs.FineTuningJob(
        model=base_model,  # Base Gemini Pro model
        training_data_uri=train_dataset_path,  # GCS path to training data
        validation_data_uri=eval_dataset_path,  # GCS path to validation data
        # ... other fine-tuning configurations (e.g., hyperparameters) ...
    )

    fine_tuned_model = fine_tuning_job.run()  # Run the fine-tuning job

    # Now use the fine-tuned model for embedding generation
    train_dataset = CMAPSSJSONLDataset(train_dataset_path, use_rolling_features=use_rolling_features,
                                       model=fine_tuned_model)
    eval_dataset = CMAPSSJSONLDataset(eval_dataset_path, use_rolling_features=use_rolling_features,
                                      model=fine_tuned_model)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    eval_loader = DataLoader(eval_dataset, batch_size=64)


    # 3. Model Initialization (LSTM)
    embedding_dim = 768  # Gemini embedding dimension
    hidden_size = 128
    model = RULPredictionModel(embedding_dim, hidden_size)

    # 4. Device Configuration
    if torch.cuda.is_available():
        device = torch.device("cuda")  # Use GPU if available
        print("GPU is available and being used.")
    else:
        device = torch.device("cpu")  # Fallback to CPU if GPU is not available
        print("GPU is not available, using CPU instead.")

    model.to(device)
    logging.info(f"Using device: {device}")

    # 5. Optimizer and Loss Function
    optimizer = optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-4)
    criterion = nn.MSELoss()  # Assuming you want to predict RUL as a regression task

    # 6. Training Loop
    num_epochs = 100  # You can adjust the number of epochs
    best_eval_loss = float('inf')
    patience = 10
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for batch_idx, (embeddings, completions) in enumerate(train_loader):
            embeddings = embeddings.to(device)
            completions = completions.to(device).float().reshape(-1, 1)

            optimizer.zero_grad()
            outputs = model(embeddings)  # Pass embeddings through LSTM
            loss = criterion(outputs, completions)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            if batch_idx % 50 == 0:
                logging.info(f"Epoch: {epoch + 1}, Batch: {batch_idx}, Loss: {loss.item()}")

        avg_loss = total_loss / len(train_loader)
        logging.info(f"Epoch: {epoch + 1}, Average Training Loss: {avg_loss}")

        # 7. Evaluation
        model.eval()
        eval_loss = 0
        with torch.no_grad():
            for embeddings_eval, completions_eval in eval_loader:
                embeddings_eval = embeddings_eval.to(device)
                completions_eval = completions_eval.to(device).float().reshape(-1, 1)

                outputs_eval = model(embeddings_eval)  # Pass embeddings through LSTM
                loss_eval = criterion(outputs_eval, completions_eval)
                eval_loss += loss_eval.item()

        avg_eval_loss = eval_loss / len(eval_loader)
        logging.info(f"Epoch: {epoch + 1}, Average Evaluation Loss: {avg_eval_loss}")

        # 8. Early Stopping
        if avg_eval_loss < best_eval_loss:
            best_eval_loss = avg_eval_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:
                logging.info("Early stopping triggered")
                break

    # 9. Load the best model
    model.load_state_dict(torch.load('best_model.pth'))

    logging.info("Starting model saving...")

    # 10. Save Model (Local and GCS)
    local_model_path = 'model-nasa-gpu.pth'
    torch.save(model.state_dict(), local_model_path)

    try:
        base_output_path = base_output_dir
        subprocess.run(['gsutil', 'cp', local_model_path, base_output_path], check=True)
        logging.info(f"Copied model to GCS BUCKET path: {base_output_path}")

        if 'AIP_MODEL_DIR' in os.environ:
            gcs_model_path = os.path.join(os.environ['AIP_MODEL_DIR'], 'model-nasa-gpu.pth')
            subprocess.run(['gsutil', 'cp', local_model_path, gcs_model_path], check=True)
            logging.info(f"Copied model to Vertex AI path: {gcs_model_path}")
            model_save_path = gcs_model_path
        else:
            logging.info(f"Saving model to local path: {local_model_path}")
            model_save_path = local_model_path

        logging.info(f"Model saved to: {model_save_path}")

    except subprocess.CalledProcessError as e:
        logging.error(f"Error saving model: {e}")
        raise

    print("Training completed.")

# --- Main Execution ---
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", type=str, default="rul_predictor_jsonl", help="Model name")
    parser.add_argument("--train_dataset", type=str, required=True, help="Path to training dataset JSONL")
    parser.add_argument("--eval_dataset", type=str, required=True, help="Path to evaluation dataset JSONL")
    parser.add_argument("--staging_bucket", type=str, required=True, help="Staging bucket for Vertex AI")
    parser.add_argument("--bucket_name", type=str, required=True, help="Bucket name")
    parser.add_argument("--base_output_dir", type=str, required=True, help="Base output directory in GCS")
    parser.add_argument("--use_rolling_features", action='store_true', help="Use rolling window features")
    args = parser.parse_args()

    train_model(args.model, args.train_dataset, args.eval_dataset,
                args.staging_bucket, args.bucket_name, args.base_output_dir,
                args.use_rolling_features)
"""

In [None]:
import os
from google.cloud import aiplatform
import argparse
import json
import logging
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import time
import subprocess
from google.cloud import storage
import pandas as pd
from typing import List
import numpy as np
import torch.utils.data as data
import vertexai
from vertexai.preview.language_models import TextGenerationModel


# --- Main Script ---
if __name__ == "__main__":
    # Get project details from environment variables
    PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT")
    REGION = os.environ.get("GOOGLE_CLOUD_REGION")
    SERVICEACCOUNT = os.environ.get("GOOGLE_CLOUD_SERVICEACCOUNT")
    PROJECT_NUMBER = os.environ.get("GOOGLE_CLOUD_PROJECT_NUMBER")
    BUCKET_NAME = os.environ.get("GOOGLE_CLOUD_BUCKET_NAME")
    STAGING_BUCKET = f"gs://{BUCKET_NAME}/staging"

    # Initialize Vertex AI
    aiplatform.init(project=PROJECT_ID, location=REGION)

    # --- Data and Output Paths ---
    TRAINING_DATA_PATH = f"gs://{BUCKET_NAME}/cmapss_FD004_train_text_transformed.jsonl"
    EVAL_DATA_PATH = f"gs://{BUCKET_NAME}/cmapss_FD004_test_text_transformed.jsonl"
    BASE_OUTPUT_DIR = f"gs://{BUCKET_NAME}/model_output"

    # Create or overwrite trainer/train.py
    os.makedirs('trainer', exist_ok=True)
    with open('trainer/train.py', 'w') as f:
        f.write(train_py_content)

    # Create and run the custom training job
    job = aiplatform.CustomTrainingJob(
    display_name="cmapss-rul-gemini-finetuning",
    script_path="trainer/train.py",
    requirements=["google-cloud-aiplatform", "google-generativeai", "transformers", "pandas", "torch", "tensorflow"], # Added tensorflow to the requirements
    container_uri="us-docker.pkg.dev/vertex-ai/training/pytorch-gpu.2-4.py310:latest",  # Updated container URI
    staging_bucket=STAGING_BUCKET,
    model_serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/pytorch-gpu.2-4:latest"  # Updated serving container URI
    )


    model = job.run(
        replica_count=1,
        machine_type="a2-highgpu-1g",  # Machine type with A100 GPU
        accelerator_type="NVIDIA_TESLA_A100",  # A100 accelerator
        accelerator_count=1,
        base_output_dir=BASE_OUTPUT_DIR,
        args=['--model', 'rul_predictor_cmapss',
              '--train_dataset', TRAINING_DATA_PATH,
              '--eval_dataset', EVAL_DATA_PATH,
              '--staging_bucket', STAGING_BUCKET,
              '--bucket_name', BUCKET_NAME,
              '--base_output_dir', BASE_OUTPUT_DIR],
    )

    model.wait()  # Wait for training to complete

    print('\n\n')
    print(f"Model name: {model.display_name}")
    print(f"Model resource name: {model.resource_name}")
    print(f"Model training start time: {model.create_time}")
    print(f"Model training end time: {model.update_time}")
    print(f"Model training duration: {model.update_time - model.create_time}")
    print('\n')

    # Accessing metrics from training job completion statistics metadata
    if model.training_job and hasattr(model.training_job, 'completion_stats') and model.training_job.completion_stats and model.training_job.completion_stats.metadata:
        metrics = model.training_job.completion_stats.metadata
        # Use .get() for safety
        print(f"Model training loss: {metrics.get('train_loss')}")
        print(f"Model evaluation loss: {metrics.get('eval_loss')}")
        print(f"Model evaluation metrics: {metrics}")
    else:
        print("Training job or completion statistics metadata not available.")

    print('\n')

    logging.info(f"Fine-tuned model: {model.resource_name}")

## MODEL EVALUATION

In [None]:
from google.cloud import storage
import torch

# Initialize the GCS client
client = storage.Client()

# Specify the bucket and blob (file)
bucket_name = os.environ.get("GOOGLE_CLOUD_BUCKET_NAME")
bucket = client.bucket(bucket_name)
blob_name = "model_output/model-nasa-gpu.pth"

blob_name = "model_output/model-nasa-gpu.pth"
bucket = client.bucket(bucket_name)
blob = bucket.blob(blob_name)

# Download the file to a local path
local_file_path = "model-nasa-gpu.pth"  # Or your desired local path
blob.download_to_filename(local_file_path)

print(f"Downloaded '{blob_name}' from '{bucket_name}' to '{local_file_path}'")

Downloaded 'model_output/model-nasa-gpu.pth' from 'poc-my-new-staging-bucket-2025-1' to 'model-nasa-gpu.pth'


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Assuming this is how the CMAPSSJSONLDataset is defined in the document
import torch
from torch.utils import data
import json
from google.cloud import storage
import logging
import os

class CMAPSSJSONLDataset(data.Dataset):
    def __init__(self, data_path, sequence_length=30):
        self.data = []
        self.sequence_length = sequence_length

        # Check if data_path is a local file or a GCS URI
        if data_path.startswith('gs://'):
            # If GCS URI, download to a temporary file
            storage_client = storage.Client()
            bucket_name = data_path.split('/')[2]
            blob_name = '/'.join(data_path.split('/')[3:])
            bucket = storage_client.bucket(bucket_name)
            blob = bucket.blob(blob_name)
            tmp_file = "/tmp/temp_data.jsonl"
            blob.download_to_filename(tmp_file)
            data_file = tmp_file
        else:
            # If local file, use it directly
            data_file = data_path

        with open(data_file, 'r') as f:
            for line in f:
                try:
                    record = json.loads(line)
                    sequence = torch.tensor(record["sequence"], dtype=torch.float32)
                    rul = torch.tensor([record["rul"]], dtype=torch.float32)
                    self.data.append((sequence, rul))
                except json.JSONDecodeError as e:
                    logging.warning("Skipping invalid JSON line: %s, Error: %s", line, e)

        # Remove the temporary file if it was created
        if data_path.startswith('gs://'):
            os.remove(tmp_file)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

class RULPredictionModel(nn.Module):
    #LSTM-based model for RUL prediction
    def __init__(self, input_size, hidden_size, num_layers=3, dropout=0.3): # Match the architecture in train.py
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc1 = nn.Linear(hidden_size, hidden_size // 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden_size // 2, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc1(out[:, -1, :])
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        return out

import numpy as np

def calculate_cmapss_score(true_rul, predicted_rul):
    """Calculates a simplified CMAPSS score."""
    d = np.array(predicted_rul) - np.array(true_rul)  # Difference between predicted and true RUL
    score = sum([
        np.exp(-d[i] / 13) - 1 if d[i] < 0 else np.exp(d[i] / 10) - 1
        for i in range(len(d))
    ])
    return score


def evaluate_model(model_path, eval_dataset_path, input_size, hidden_size, sequence_length):
    """Evaluates the trained RUL prediction model."""

    # Load the saved model using the correct model architecture
    model = RULPredictionModel(input_size, hidden_size, num_layers=3)  # Change num_layers to 3 to match the training architecture

    # Explicitly load the model to the CPU
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cuda')))

    model.eval()  # Set to evaluation mode



    # Load the evaluation dataset
    eval_dataset = CMAPSSJSONLDataset(eval_dataset_path, sequence_length)
    eval_loader = DataLoader(eval_dataset, batch_size=64, shuffle=False)  # No need to shuffle for evaluation

    # Make predictions and calculate metrics
    all_predictions = []
    all_targets = []
    with torch.no_grad():  # Disable gradient calculations during evaluation
        for sequences, ruls in eval_loader:
            predictions = model(sequences)
            all_predictions.extend(predictions.flatten().tolist())
            all_targets.extend(ruls.flatten().tolist())

    # Calculate evaluation metrics (e.g., MAE, RMSE)
    mae = mean_absolute_error(all_targets, all_predictions)
    rmse = np.sqrt(mean_squared_error(all_targets, all_predictions))
    mse = mean_squared_error(all_targets, all_predictions)  # Calculate MSE

    # Calculate CMAPSS score
    cmapss_score = calculate_cmapss_score(all_targets, all_predictions)

    # Print the results
    print(f"Evaluation Results:")
    print(f"Average Evaluation Loss (MSE): {mse:.2f}")
    print(f"Mean Absolute Error (MAE): {mae:.2f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
    print(f"CMAPSS Score: {cmapss_score:.2f}")



# --- Example Usage (if __name__ == "__main__": block) ---
if __name__ == "__main__":
    model_path = "model-nasa-gpu.pth"  # Replace with the actual path to your saved model
    eval_dataset_path = f"gs://{BUCKET_NAME}/cmapss_FD004_test_sequences.jsonl"  # Replace with your evaluation data path (GCS URI)
    input_size = 25  # Updated to match the input size used during training
    hidden_size = 128 # Updated to match the hidden size used during training
    sequence_length = 30  # Replace with the sequence length used during training

    evaluate_model(model_path, eval_dataset_path, input_size, hidden_size, sequence_length)

Evaluation Results:
Average Evaluation Loss (MSE): 675.03
Mean Absolute Error (MAE): 25.98
Root Mean Squared Error (RMSE): 25.98
CMAPSS Score: 1607.38


N=10

Evaluation Results:
Average Evaluation Loss (MSE): 675.91
Mean Absolute Error (MAE): 26.00
Root Mean Squared Error (RMSE): 26.00
CMAPSS Score: 1609.80

N=200

Evaluation Results:
Average Evaluation Loss (MSE): 676.00
Mean Absolute Error (MAE): 26.00
Root Mean Squared Error (RMSE): 26.00
CMAPSS Score: 1610.04



In [None]:
import os

print("GOOGLE_CLOUD_PROJECT:", os.environ.get("GOOGLE_CLOUD_PROJECT"))
print("GOOGLE_CLOUD_REGION:", os.environ.get("GOOGLE_CLOUD_REGION"))
print("GOOGLE_CLOUD_SERVICEACCOUNT:", os.environ.get("GOOGLE_CLOUD_SERVICEACCOUNT"))
print("GOOGLE_CLOUD_PROJECT_NUMBER:", os.environ.get("GOOGLE_CLOUD_PROJECT_NUMBER"))
print("GOOGLE_CLOUD_BUCKET_NAME:", os.environ.get("GOOGLE_CLOUD_BUCKET_NAME"))

In [None]:
VERTEX_AI_SERVICE_AGENT = !gcloud projects describe $PROJECT_ID --format='value(project_number)' | xargs -I{} gcloud iam service-accounts list --filter="displayName:Compute Engine default service account" --project=$PROJECT_ID --format="value(email)"
VERTEX_AI_SERVICE_AGENT = VERTEX_AI_SERVICE_AGENT[0].strip()  # Access the first element (index 0)
print("Vertex AI Service Agent:", VERTEX_AI_SERVICE_AGENT)