# Conversational AI

This notebook contains the code to train a Transformer for creating a chatbot integrated with Neo4j graphj database


In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, GPT2Config
from torch.utils.data import Dataset, DataLoader

# Define a simple dataset class
class ChatDataset(Dataset):
    def __init__(self, conversations):
        self.conversations = conversations
        self.tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

    def __len__(self):
        return len(self.conversations)

    def __getitem__(self, idx):
        conversation = self.conversations[idx]
        inputs = self.tokenizer.encode(conversation, return_tensors="pt")
        return {"input_ids": inputs.squeeze()}

# Load and preprocess your chatbot dataset
conversations = ["User: Hi! How are you?", "Bot: I'm doing well, thank you!"]

dataset = ChatDataset(conversations)

# Model configuration
model_config = GPT2Config.from_pretrained("gpt2")
model_config.is_decoder = True  # Set the model as a decoder

# Load the pre-trained GPT-2 model
chatbot_model = GPT2LMHeadModel.from_pretrained("gpt2", config=model_config)

# Training settings
optimizer = torch.optim.AdamW(chatbot_model.parameters(), lr=1e-5)
train_dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Training loop
num_epochs = 5

for epoch in range(num_epochs):
    for batch in train_dataloader:
        inputs = batch["input_ids"]
        outputs = chatbot_model(inputs, labels=inputs)
        loss = outputs.loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")

# Save the trained model
chatbot_model.save_pretrained("chatbot_model")


vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Epoch: 1, Loss: 2.843620777130127
Epoch: 1, Loss: 3.0680181980133057
Epoch: 2, Loss: 2.8434765338897705
Epoch: 2, Loss: 2.0682106018066406
Epoch: 3, Loss: 1.8928117752075195
Epoch: 3, Loss: 2.265192747116089
Epoch: 4, Loss: 1.5362848043441772
Epoch: 4, Loss: 1.9792900085449219
Epoch: 5, Loss: 1.1883094310760498
Epoch: 5, Loss: 1.7372899055480957
