## Saving a trained model from PyTorch to ONNX format and upload the file to an S3 bucket.

In [4]:
# Import required libraries and packages
import os
import datetime
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM

# Hugging Face Authentication and Model Download
def authenticate_and_download_model(model_name: str):
    """
    Authenticate Hugging Face user and download the specified model locally.
    """
    from huggingface_hub import login
    
    # Prompt the user for their Hugging Face token
    token = input("Enter your Hugging Face access token: ")
    login(token=token)

    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=True)
    print(f"Model '{model_name}' and tokenizer successfully loaded locally.")
    return tokenizer, model

# Specify the Hugging Face model
model_name = "meta-llama/Llama-2-7b-hf"
tokenizer, model = authenticate_and_download_model(model_name)

# Load and preprocess the dataset
data = pd.read_csv('/opt/app-root/src/models/data/diabetes.csv')

# Split the data into features (X) and target (y)
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# Train, validation, and test split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=0)
X_validation, X_test, y_validation, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=0)

# Convert data into PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train.values)
y_train_tensor = torch.LongTensor(y_train.values)
X_validation_tensor = torch.FloatTensor(X_validation.values)
y_validation_tensor = torch.LongTensor(y_validation.values)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
validation_dataset = TensorDataset(X_validation_tensor, y_validation_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=32)

# Define the neural network architecture
class DiabetesModel(torch.nn.Module):
    def __init__(self):
        super(DiabetesModel, self).__init__()
        self.fc1 = torch.nn.Linear(8, 20)
        self.fc2 = torch.nn.Linear(20, 2)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model, loss function, and optimizer
torch.manual_seed(10)
model = DiabetesModel()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# TensorBoard setup
log_dir = "logs/training/" + datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
writer = SummaryWriter(log_dir)

# Training loop
epochs = 10
for epoch in range(epochs):
    model.train()
    train_loss = 0.0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validation loop
    model.eval()
    validation_loss = 0.0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        for X_batch, y_batch in validation_loader:
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            validation_loss += loss.item()

            # Calculate accuracy
            _, predicted_classes = torch.max(predictions, 1)
            correct_predictions += (predicted_classes == y_batch).sum().item()
            total_predictions += y_batch.size(0)

    # Calculate average losses and accuracy
    train_loss /= len(train_loader)
    validation_loss /= len(validation_loader)
    validation_accuracy = correct_predictions / total_predictions

    # Log metrics to TensorBoard
    writer.add_scalar("Loss/Train", train_loss, epoch)
    writer.add_scalar("Loss/Validation", validation_loss, epoch)
    writer.add_scalar("Accuracy/Validation", validation_accuracy, epoch)

    print(f"Epoch {epoch + 1}/{epochs}: "
          f"Train Loss = {train_loss:.4f}, Validation Loss = {validation_loss:.4f}, "
          f"Validation Accuracy = {validation_accuracy:.4f}")

# Close the TensorBoard writer
writer.close()

# Print instructions for TensorBoard
print(f"Training complete. To view TensorBoard, run the following command:")
print(f"tensorboard --logdir={log_dir}")

# Exporting the model to ONNX format and saving it locally
onnx_file_path = "./meta-llama-7b_model.onnx"
dummy_input = torch.randn(1, 8)  # Dummy input with the same feature dimensions
torch.onnx.export(
    model, 
    dummy_input, 
    onnx_file_path, 
    export_params=True, 
    opset_version=12, 
    do_constant_folding=True, 
    input_names=["input"], 
    output_names=["output"], 
    dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}
)

print(f"ONNX model exported successfully and saved at '{onnx_file_path}'.")


Enter your Hugging Face access token:  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




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model 'meta-llama/Llama-2-7b-hf' and tokenizer successfully loaded locally.
Epoch 1/10: Train Loss = 2.3556, Validation Loss = 1.0875, Validation Accuracy = 0.5304
Epoch 2/10: Train Loss = 1.0120, Validation Loss = 0.9504, Validation Accuracy = 0.6261
Epoch 3/10: Train Loss = 0.8020, Validation Loss = 0.8709, Validation Accuracy = 0.6435
Epoch 4/10: Train Loss = 0.7303, Validation Loss = 0.8053, Validation Accuracy = 0.6348
Epoch 5/10: Train Loss = 0.7298, Validation Loss = 0.9113, Validation Accuracy = 0.6087
Epoch 6/10: Train Loss = 0.7081, Validation Loss = 0.6856, Validation Accuracy = 0.6609
Epoch 7/10: Train Loss = 0.6772, Validation Loss = 0.7526, Validation Accuracy = 0.6870
Epoch 8/10: Train Loss = 0.7427, Validation Loss = 0.6779, Validation Accuracy = 0.6957
Epoch 9/10: Train Loss = 0.6783, Validation Loss = 0.7930, Validation Accuracy = 0.6522
Epoch 10/10: Train Loss = 0.6341, Validation Loss = 0.6262, Validation Accuracy = 0.6696
Training complete. To view TensorBoard, run

In [2]:
pip install transformers

Collecting transformers
  Downloading transformers-4.47.0-py3-none-any.whl (10.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hCollecting regex!=2019.12.17
  Downloading regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (780 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m780.9/780.9 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tokenizers<0.22,>=0.21
  Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting safetensors>=0.4.1
  Downloading safetensors-0.4.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (436 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m436.1/436.1 kB[0m [31m16.3 MB/s[0m eta 

## Uploading ONNX Model file to an S3-Bucket

In [5]:
## Dell-APEX

import boto3
import os

# Specify the ONNX model file name and S3 bucket details
onnx_file_name = "./meta-llama-7b_model.onnx"
bucket_name = os.getenv("AWS_S3_BUCKET")
endpoint_url = "https://172.17.66.103"  # Update this to your S3 endpoint URL

# Fetch AWS credentials from environment variables
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")

# Initialize the S3 client
try:
    s3_client = boto3.client(
        "s3",
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        endpoint_url=endpoint_url,
        verify=False  # Disable SSL verification for custom S3 endpoints
    )
    print(f"S3 bucket '{bucket_name}' connected successfully.")
except Exception as e:
    print(f"Failed to connect to S3: {e}")
    raise

# Upload the ONNX model to the S3 bucket
try:
    s3_client.upload_file(onnx_file_name, bucket_name, os.path.basename(onnx_file_name))
    print(f"File '{onnx_file_name}' uploaded to S3 bucket '{bucket_name}' successfully.")
except Exception as e:
    print(f"Failed to upload file: {e}")
    raise


S3 bucket 'openshift-ai-bucket' connected successfully.
File './meta-llama-7b_model.onnx' uploaded to S3 bucket 'openshift-ai-bucket' successfully.




In [1]:
## Dell-APEX - To get Available objets in S3
import boto3
import os

# Specify S3 bucket details
bucket_name = os.getenv("AWS_S3_BUCKET")
endpoint_url = "https://172.17.66.103"  # Update this to your S3 endpoint URL

# Fetch AWS credentials from environment variables
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")

# Initialize the S3 client
try:
    s3_client = boto3.client(
        "s3",
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        endpoint_url=endpoint_url,
        verify=False  # Disable SSL verification for custom S3 endpoints
    )
    print(f"Connected to S3 bucket '{bucket_name}' successfully.")
except Exception as e:
    print(f"Failed to connect to S3: {e}")
    raise

# List available files in the S3 bucket
try:
    response = s3_client.list_objects_v2(Bucket=bucket_name)
    if 'Contents' in response:
        print(f"Available files in the S3 bucket '{bucket_name}':")
        for obj in response['Contents']:
            print(f"- {obj['Key']}")
    else:
        print(f"The S3 bucket '{bucket_name}' is empty.")
except Exception as e:
    print(f"Failed to list files in the bucket: {e}")
    raise


Connected to S3 bucket 'openshift-ai-bucket' successfully.
Available files in the S3 bucket 'openshift-ai-bucket':
- models/
- models/meta-llama-7b_model.onnx




In [5]:
## Minio-s3
import boto3
import os

# Specify the ONNX model file name and S3 bucket details
onnx_file_name = "./meta-llama-7b_model.onnx"
bucket_name = os.getenv("AWS_S3_BUCKET")
endpoint_url = "https://minio-api-minio.apps.ocpbmai.sdxtest.local"  # Update this to your S3 endpoint URL

# Fetch AWS credentials from environment variables
aws_access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")

# Initialize the S3 client
try:
    s3_client = boto3.client(
        "s3",
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        endpoint_url=endpoint_url,
        verify=False  # Disable SSL verification for custom S3 endpoints
    )
    print(f"S3 bucket '{bucket_name}' connected successfully.")
except Exception as e:
    print(f"Failed to connect to S3: {e}")
    raise

# Upload the ONNX model to the S3 bucket
try:
    s3_client.upload_file(onnx_file_name, bucket_name, os.path.basename(onnx_file_name))
    print(f"File '{onnx_file_name}' uploaded to S3 bucket '{bucket_name}' successfully.")
except Exception as e:
    print(f"Failed to upload file: {e}")
    raise


S3 bucket 'ocp-ai-bucket' connected successfully.
File './meta-llama-7b_model.onnx' uploaded to S3 bucket 'ocp-ai-bucket' successfully.


