Traceback (most recent call last):
  File "c:\Users\ZAKI\.vscode\extensions\ms-python.python-2025.0.0-win32-x64\python_files\python_server.py", line 133, in exec_user_input
    retval = callable_(user_input, user_globals)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<string>", line 14, in <module>
  File "C:\Users\ZAKI\AppData\Roaming\Python\Python312\site-packages\torchtext\__init__.py", line 18, in <module>
    from torchtext import _extension  # noqa: F401
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\ZAKI\AppData\Roaming\Python\Python312\site-packages\torchtext\_extension.py", line 64, in <module>
    _init_extension()
  File "C:\Users\ZAKI\AppData\Roaming\Python\Python312\site-packages\torchtext\_extension.py", line 58, in _init_extension
    _load_lib("libtorchtext")
  File "C:\Users\ZAKI\AppData\Roaming\Python\Python312\site-packages\torchtext\_extension.py", line 50, in _load_lib
    torch.ops.load_library(path)
  File "C:\Users\ZAKI\AppData\Roaming\Python\P

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Multi-Head Self-Attention
class SelfAttention(nn.Module):
    def __init__(self, embed_size, heads):
        super(SelfAttention, self).__init__()
        self.embed_size = embed_size
        self.heads = heads
        self.head_dim = embed_size // heads

        assert (
            self.head_dim * heads == embed_size
        ), "Embedding size must be divisible by number of heads"

        self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.fc_out = nn.Linear(embed_size, embed_size)

    def forward(self, value, key, query, mask):
        N = query.shape[0]
        value_len, key_len, query_len = value.shape[1], key.shape[1], query.shape[1]

        # Split the embedding into self.heads pieces
        values = value.view(N, value_len, self.heads, self.head_dim).permute(0, 2, 1, 3)
        keys = key.view(N, key_len, self.heads, self.head_dim).permute(0, 2, 1, 3)
        queries = query.view(N, query_len, self.heads, self.head_dim).permute(0, 2, 1, 3)

        # Scaled dot-product attention
        energy = torch.matmul(queries, keys.permute(0, 1, 3, 2)) / (self.head_dim ** 0.5)

        if mask is not None:
            energy = energy.masked_fill(mask == 0, float("-1e20"))

        attention = torch.softmax(energy, dim=-1)

        out = torch.matmul(attention, values).permute(0, 2, 1, 3).contiguous()
        out = out.view(N, query_len, self.embed_size)

        return self.fc_out(out)

# Transformer Block
class TransformerBlock(nn.Module):
    def __init__(self, embed_size, heads, dropout, forward_expansion):
        super(TransformerBlock, self).__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm1 = nn.LayerNorm(embed_size)
        self.norm2 = nn.LayerNorm(embed_size)

        self.feed_forward = nn.Sequential(
            nn.Linear(embed_size, forward_expansion * embed_size),
            nn.ReLU(),
            nn.Linear(forward_expansion * embed_size, embed_size),
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, value, key, query, mask):
        attention = self.attention(value, key, query, mask)
        x = self.dropout(self.norm1(attention + query))
        forward = self.feed_forward(x)
        out = self.dropout(self.norm2(forward + x))
        return out

# Encoder
class Encoder(nn.Module):
    def __init__(
        self,
        src_vocab_size,
        embed_size,
        num_layers,
        heads,
        device,
        forward_expansion,
        dropout,
        max_length,
    ):
        super(Encoder, self).__init__()
        self.embed_size = embed_size
        self.device = device
        self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
        self.position_embedding = nn.Embedding(max_length, embed_size)

        self.layers = nn.ModuleList(
            [
                TransformerBlock(embed_size, heads, dropout, forward_expansion)
                for _ in range(num_layers)
            ]
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask):
        N, seq_length = x.shape
        positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
        out = self.dropout(self.word_embedding(x) + self.position_embedding(positions))

        for layer in self.layers:
            out = layer(out, out, out, mask)

        return out

# Decoder Block
class DecoderBlock(nn.Module):
    def __init__(self, embed_size, heads, forward_expansion, dropout, device):
        super(DecoderBlock, self).__init__()
        self.attention = SelfAttention(embed_size, heads)
        self.norm = nn.LayerNorm(embed_size)
        self.transformer_block = TransformerBlock(
            embed_size, heads, dropout, forward_expansion
        )
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, value, key, src_mask, trg_mask):
        attention = self.attention(x, x, x, trg_mask)
        query = self.dropout(self.norm(attention + x))
        out = self.transformer_block(value, key, query, src_mask)
        return out

# Decoder
class Decoder(nn.Module):
    def __init__(
        self,
        trg_vocab_size,
        embed_size,
        num_layers,
        heads,
        forward_expansion,
        dropout,
        device,
        max_length,
    ):
        super(Decoder, self).__init__()
        self.device = device
        self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
        self.position_embedding = nn.Embedding(max_length, embed_size)

        self.layers = nn.ModuleList(
            [
                DecoderBlock(embed_size, heads, forward_expansion, dropout, device)
                for _ in range(num_layers)
            ]
        )

        self.fc_out = nn.Linear(embed_size, trg_vocab_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, enc_out, src_mask, trg_mask):
        N, seq_length = x.shape
        positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
        x = self.dropout(self.word_embedding(x) + self.position_embedding(positions))

        for layer in self.layers:
            x = layer(x, enc_out, enc_out, src_mask, trg_mask)

        out = self.fc_out(x)

        return out

# Transformer
class Transformer(nn.Module):
    def __init__(
        self,
        src_vocab_size,
        trg_vocab_size,
        src_pad_idx,
        trg_pad_idx,
        embed_size=512,
        num_layers=6,
        forward_expansion=4,
        heads=8,
        dropout=0,
        device="cuda",
        max_length=100,
    ):
        super(Transformer, self).__init__()

        self.encoder = Encoder(
            src_vocab_size,
            embed_size,
            num_layers,
            heads,
            device,
            forward_expansion,
            dropout,
            max_length,
        )

        self.decoder = Decoder(
            trg_vocab_size,
            embed_size,
            num_layers,
            heads,
            forward_expansion,
            dropout,
            device,
            max_length,
        )

        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device

    def make_src_mask(self, src):
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
        return src_mask.to(self.device)

    def make_trg_mask(self, trg):
        N, trg_len = trg.shape
        trg_mask = torch.tril(torch.ones((trg_len, trg_len))).expand(
            N, 1, trg_len, trg_len
        )

        return trg_mask.to(self.device)

    def forward(self, src, trg):
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        enc_src = self.encoder(src, src_mask)
        out = self.decoder(trg, enc_src, src_mask, trg_mask)
        return out


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import spacy
import random
import time



OSError: [WinError 127] The specified procedure could not be found

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from collections import Counter
from torch.nn.utils.rnn import pad_sequence
import torch.optim as optim
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Import the Encoder class from transformers_scratch.py
from transformers_scratch import Encoder

# Modified Transformer for Classification
class SentimentTransformer(nn.Module):
    def __init__(
        self,
        vocab_size,
        pad_idx,
        embed_size=512,
        num_layers=6,
        forward_expansion=4,
        heads=8,
        dropout=0.1,
        device="cuda",
        max_length=512
    ):
        super(SentimentTransformer, self).__init__()
        
        self.encoder = Encoder(
            vocab_size,
            embed_size,
            num_layers,
            heads,
            device,
            forward_expansion,
            dropout,
            max_length
        )
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(embed_size, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, 2)  # 2 classes for sentiment
        )
        
        self.pad_idx = pad_idx
        self.device = device
    
    def make_src_mask(self, src):
        src_mask = (src != self.pad_idx).unsqueeze(1).unsqueeze(2)
        return src_mask.to(self.device)
    
    def forward(self, src):
        src_mask = self.make_src_mask(src)
        encoder_out = self.encoder(src, src_mask)
        # Global average pooling over sequence length
        pooled = encoder_out.mean(dim=1)
        return self.classifier(pooled)

# Custom Dataset
class IMDBDataset(Dataset):
    def __init__(self, data, tokenizer, vocab, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.vocab = vocab
        self.max_length = max_length
        self.label_map = {'positive': 1, 'negative': 0}  # Map labels to integers
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        text, label = self.data[idx]['review'], self.data[idx]['sentiment']
        tokens = self.tokenizer(text)[:self.max_length]
        # Convert tokens to indices
        indices = [self.vocab.get(token, self.vocab['<unk>']) for token in tokens]
        label = self.label_map[label]  # Convert label to integer
        return torch.tensor(indices, dtype=torch.long), torch.tensor(label, dtype=torch.long)
    

def collate_batch(batch):
    text_list, label_list = [], []
    for (_text, _label) in batch:
        label_list.append(_label)
        text_list.append(_text)
    return pad_sequence(text_list, padding_value=1, batch_first=True), torch.tensor(label_list)

def build_vocab(data, tokenizer, max_vocab_size=25000):
    counter = Counter()
    for example in data:
        counter.update(tokenizer(example['review']))
    vocab = {word: idx + 2 for idx, (word, _) in enumerate(counter.most_common(max_vocab_size))}
    vocab['<pad>'] = 0
    vocab['<unk>'] = 1
    return vocab

def load_data_from_csv(df):
    data = []
    for _, row in df.iterrows():
        data.append({'review': row['review'], 'sentiment': row['sentiment']})
    return data

def train_model():
    # Hyperparameters
    BATCH_SIZE = 32
    EPOCHS = 5
    LEARNING_RATE = 0.0001
    MAX_LENGTH = 512
    EMBED_SIZE = 512
    NUM_HEADS = 8
    NUM_LAYERS = 6
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # Load dataset from CSV
    df = pd.read_csv('IMDB Dataset.csv', on_bad_lines='skip', quotechar='"')
    df_train = df.sample(frac=0.8, random_state=42)
    df_test = df.drop(df_train.index)
    train_data = load_data_from_csv(df_train)
    test_data = load_data_from_csv(df_test)
    
    # Tokenizer
    tokenizer = lambda x: x.split()  # Simple whitespace tokenizer
    
    # Build vocabulary
    vocab = build_vocab(train_data, tokenizer)
    
    # Create datasets
    train_dataset = IMDBDataset(train_data, tokenizer, vocab, MAX_LENGTH)
    test_dataset = IMDBDataset(test_data, tokenizer, vocab, MAX_LENGTH)
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, 
                            shuffle=True, collate_fn=collate_batch)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, 
                           shuffle=False, collate_fn=collate_batch)
    
    # Initialize model
    model = SentimentTransformer(
        vocab_size=len(vocab),
        pad_idx=vocab['<pad>'],
        embed_size=EMBED_SIZE,
        num_layers=NUM_LAYERS,
        heads=NUM_HEADS,
        device=DEVICE
    ).to(DEVICE)
    
    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    
    # Training loop
    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{EPOCHS}')
        
        for batch_idx, (text, labels) in enumerate(progress_bar):
            text, labels = text.to(DEVICE), labels.to(DEVICE)
            
            optimizer.zero_grad()
            output = model(text)
            loss = criterion(output, labels)
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            total_loss += loss.item()
            progress_bar.set_postfix({'loss': total_loss/(batch_idx+1)})
        
        # Validation
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for text, labels in test_loader:
                text, labels = text.to(DEVICE), labels.to(DEVICE)
                outputs = model(text)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        print(f'Epoch {epoch+1}, Accuracy: {100 * correct / total:.2f}%')

if __name__ == "__main__":
    train_model()

Epoch 1/5:   0%|          | 1/1250 [02:48<58:21:54, 168.23s/it, loss=0.859]


KeyboardInterrupt: 

In [3]:
import torch
import torchtext
print(torch.__version__)
print(torchtext.__version__)


OSError: [WinError 127] The specified procedure could not be found

In [12]:
import os
import pickle

def load_model(save_dir='saved_model', device='cpu'):
    # Load vocabulary
    with open(os.path.join(save_dir, 'vocab.pkl'), 'rb') as f:
        vocab = pickle.load(f)
    # Load tokenizer
    with open(os.path.join(save_dir, 'tokenizer.pkl'), 'rb') as f:
        tokenizer = pickle.load(f)
    # Initialize model
    model = SentimentTransformer(
        vocab_size=len(vocab),
        pad_idx=vocab['<pad>'],
        embed_size=64,
        num_layers=2,
        heads=8,
        device=device
    ).to(device)
    # Load model state
    model.load_state_dict(torch.load(os.path.join(save_dir, 'model.pth'), map_location=device))
    model.eval()
    print(f"Model and artifacts loaded from {save_dir}")
    return model, vocab, tokenizer

# Load the model and artifacts
model, vocab, tokenizer = load_model(save_dir='saved_model', device='cpu')

# Example inference
def predict_sentiment(model, vocab, tokenizer, text, device='cpu'):
    model.eval()
    tokens = tokenizer(text)[:64]  # Truncate to max length
    indices = [vocab.get(token, vocab['<unk>']) for token in tokens]
    tensor = torch.tensor(indices, dtype=torch.long).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(tensor)
    _, predicted = torch.max(output.data, 1)
    return 'positive' if predicted.item() == 1 else 'negative'

# Example usage
text = "This movie was fantastic!"
print(f'Sentiment: {predict_sentiment(model, vocab, tokenizer, text)}')

EOFError: Ran out of input