In [1]:
!pip install openai torch pandas scikit-learn nemo_toolkit requests

Collecting openai
  Using cached openai-1.52.2-py3-none-any.whl.metadata (24 kB)
Collecting torch
  Using cached torch-2.5.0-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting pandas
  Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting nemo_toolkit
  Using cached nemo_toolkit-1.23.0-py3-none-any.whl.metadata (18 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Using cached jiter-0.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Using cached pydantic-2.9.2-py3-none-any.whl.metadata (149 kB)
Collecting typing-extensions<5,>=4.11 (from openai)
  Using cached typing_extensions-4.12.2-py3-none-any.whl.metadata (3.0 kB)
Collecting filelock (from torch)
  Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9

In [None]:
from openai import OpenAI
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt

In [None]:
# Initialize the BioNeMo client
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",
    api_key="key"  # Replace with your API key
)

In [None]:
# Query the BioNeMo model
completion = client.chat.completions.create(
    model="writer/palmyra-med-70b",
    messages=[{"role": "user", "content": "What are the applications of AI in healthcare?"}],
    temperature=0.2,
    top_p=0.7,
    max_tokens=1024,
    stream=True
)

# Stream and print the response
for chunk in completion:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")

In [None]:
# Example data
features = [[0.5, 0.3, 0.8, 0.1], [0.2, 0.6, 0.7, 0.9]]
labels = [1, 0]

# Define the dataset class
class BioactivityDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

# Create DataLoader
dataset = BioactivityDataset(features, labels)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [None]:
# Define a simple model using PyTorch/NeMo
class BioactivityNeMoModel(nn.Module):
    def __init__(self):
        super(BioactivityNeMoModel, self).__init__()
        self.fc1 = nn.Linear(4, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)  # Binary classification

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return torch.log_softmax(self.fc3(x), dim=1)

# Initialize the model
model = BioactivityNeMoModel()

In [None]:
# Set up optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(5):
    model.train()
    total_loss = 0
    for feature, label in dataloader:
        optimizer.zero_grad()
        output = model(feature)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {total_loss / len(dataloader)}')

In [None]:
# Evaluate the trained model
def evaluate_model(model, dataloader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in dataloader:
            outputs = model(features)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total:.2f}%')

evaluate_model(model, dataloader)

In [None]:
# Save the trained model
torch.save(model.state_dict(), 'bioactivity_nemo_model.pth')
print("Model saved successfully.")

In [None]:
# Example input for both models
input_data = [[0.5, 0.3, 0.8, 0.1]]

# Local model prediction
local_model_output = model(torch.tensor(input_data, dtype=torch.float32))
_, local_prediction = torch.max(local_model_output, 1)
print(f"Local Model Prediction: {local_prediction.item()}")

# Query BioNeMo for prediction
completion = client.chat.completions.create(
    model="writer/palmyra-med-70b",
    messages=[{"role": "user", "content": "Predict the bioactivity of the given input."}],
    temperature=0.2,
    top_p=0.7,
    max_tokens=1024,
)

# Print the API response
for chunk in completion:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")

In [None]:
# Plot training loss (example data)
loss_values = [0.8, 0.6, 0.5, 0.4, 0.3]
plt.plot(range(1, len(loss_values) + 1), loss_values, marker='o')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()