In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from transformers import AutoModel, AutoTokenizer
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

# Load Pretrained GPT Model
gpt_model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(gpt_model_name)

# Ensure PAD token is set
tokenizer.pad_token = tokenizer.eos_token

gpt_model = AutoModel.from_pretrained(gpt_model_name)

# Define ServeNet-GPT-Fusion Model
class ServeNetGPTFusion(nn.Module):
    def __init__(self, gpt_model, hidden_dim=256, num_classes=50):
        super(ServeNetGPTFusion, self).__init__()
        self.gpt = gpt_model
        self.fc1 = nn.Linear(self.gpt.config.hidden_size, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_ids, attention_mask):
        with torch.no_grad():  # Freeze GPT model
            gpt_output = self.gpt(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = gpt_output.last_hidden_state[:, 0, :]  # CLS token representation
        x = self.relu(self.fc1(hidden_state))
        x = self.dropout(x)
        x = self.fc2(x)
        return self.softmax(x)

# Load Dataset
csv_file = "Service.csv"  # Update with actual file path
df = pd.read_csv(csv_file)

# Encode Categories
df["Service Classification"] = df["Service Classification"].astype('category')
category_to_index = {category: idx for idx, category in enumerate(df["Service Classification"].cat.categories)}
df["Label"] = df["Service Classification"].map(category_to_index)

# Split Data
train_texts, test_texts, train_labels, test_labels = train_test_split(df["Service Desciption"].tolist(), df["Label"].tolist(), test_size=0.2, random_state=42)

# Create Dataset Class
class ServiceDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        encoding = tokenizer(self.texts[idx], return_tensors="pt", truncation=True, padding="max_length", max_length=512)
        return {"input_ids": encoding["input_ids"].squeeze(0), "attention_mask": encoding["attention_mask"].squeeze(0), "label": torch.tensor(self.labels[idx])}

# Create DataLoader
batch_size = 8
train_dataset = ServiceDataset(train_texts, train_labels)
test_dataset = ServiceDataset(test_texts, test_labels)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Initialize Model
num_classes = 50  # Number of categories from dataset
model = ServeNetGPTFusion(gpt_model, hidden_dim=256, num_classes=num_classes)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=2e-5)

# Training Loop
def train_model(model, train_loader, epochs=3):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            output = model(batch["input_ids"], batch["attention_mask"])
            loss = criterion(output, batch["label"].long())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader)}")

train_model(model, train_loader)

# Prediction Function
def classify_web_service(text, model):
    model.eval()
    tokens = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        output = model(tokens['input_ids'], tokens['attention_mask'])
    predicted_index = output.argmax(dim=1).item()
    predicted_category = list(category_to_index.keys())[list(category_to_index.values()).index(predicted_index)]
    return predicted_category

# Example Usage
example_text = "REST API for e-commerce transactions"
predicted_category = classify_web_service(example_text, model)
print(f"Predicted Category: {predicted_category}")
