In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import math
import random
import numpy as np

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [2]:
class CharacterTokenizer:
    """Simple character-level tokenizer for name generation"""

    def __init__(self):
        self.char_to_idx = {}
        self.idx_to_char = {}
        self.vocab_size = 0

    def build_vocab(self, names):
        """Build vocabulary from list of names"""
        # Get all unique characters
        chars = set()
        for name in names:
            chars.update(name.lower())

        # Add special tokens
        chars = sorted(list(chars))
        special_tokens = ['<PAD>', '<SOS>', '<EOS>']

        # Create mappings
        all_tokens = special_tokens + chars
        self.char_to_idx = {char: idx for idx, char in enumerate(all_tokens)}
        self.idx_to_char = {idx: char for idx, char in enumerate(all_tokens)}
        self.vocab_size = len(all_tokens)

        print(f"Vocabulary size: {self.vocab_size}")
        print(f"Characters: {chars}")

    def encode(self, text):
        """Convert text to list of token indices"""
        return [self.char_to_idx.get(char.lower(), 0) for char in text]

    def decode(self, indices):
        """Convert list of indices back to text"""
        return ''.join([self.idx_to_char.get(idx, '') for idx in indices])

    def encode_name(self, name):
        """Encode name with SOS and EOS tokens"""
        encoded = [self.char_to_idx['<SOS>']]
        encoded.extend(self.encode(name))
        encoded.append(self.char_to_idx['<EOS>'])
        return encoded


In [3]:
class NameDataset(Dataset):
    """Dataset class for name generation"""

    def __init__(self, names, tokenizer, max_length=32):
        self.names = names
        self.tokenizer = tokenizer
        self.max_length = max_length

        # Encode all names
        self.encoded_names = []
        for name in names:
            encoded = tokenizer.encode_name(name)
            if len(encoded) <= max_length:
                # Pad sequence
                padded = encoded + [tokenizer.char_to_idx['<PAD>']] * (max_length - len(encoded))
                self.encoded_names.append(padded)

    def __len__(self):
        return len(self.encoded_names)

    def __getitem__(self, idx):
        sequence = torch.tensor(self.encoded_names[idx], dtype=torch.long)
        # Input is sequence[:-1], target is sequence[1:]
        return sequence[:-1], sequence[1:]


In [4]:
class PositionalEncoding(nn.Module):
    """Positional encoding for transformer"""

    def __init__(self, d_model, max_length=512):
        super().__init__()

        pe = torch.zeros(max_length, d_model)
        position = torch.arange(0, max_length).unsqueeze(1).float()

        div_term = torch.exp(torch.arange(0, d_model, 2).float() *
                           -(math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]


In [5]:
class MultiHeadAttention(nn.Module):
    """Multi-head self-attention mechanism"""

    def __init__(self, d_model, n_heads, dropout=0.1):
        super().__init__()
        assert d_model % n_heads == 0

        self.d_model = d_model
        self.n_heads = n_heads
        self.d_k = d_model // n_heads

        self.w_q = nn.Linear(d_model, d_model)
        self.w_k = nn.Linear(d_model, d_model)
        self.w_v = nn.Linear(d_model, d_model)
        self.w_o = nn.Linear(d_model, d_model)

        self.dropout = nn.Dropout(dropout)

    def scaled_dot_product_attention(self, Q, K, V, mask=None):
        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)

        if mask is not None:
            scores.masked_fill_(mask == 0, -1e9)

        attention_weights = F.softmax(scores, dim=-1)
        attention_weights = self.dropout(attention_weights)

        output = torch.matmul(attention_weights, V)
        return output

    def forward(self, x, mask=None):
        batch_size, seq_len, d_model = x.size()

        # Linear projections
        Q = self.w_q(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
        K = self.w_k(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
        V = self.w_v(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)

        # Apply attention
        attention_output = self.scaled_dot_product_attention(Q, K, V, mask)

        # Concatenate heads
        attention_output = attention_output.transpose(1, 2).contiguous().view(
            batch_size, seq_len, d_model)

        # Final linear projection
        output = self.w_o(attention_output)
        return output


In [6]:
class FeedForward(nn.Module):
    """Position-wise feed-forward network"""

    def __init__(self, d_model, d_ff, dropout=0.1):
        super().__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.linear2 = nn.Linear(d_ff, d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        return self.linear2(self.dropout(F.relu(self.linear1(x))))


In [7]:
class TransformerBlock(nn.Module):
    """Single transformer decoder block"""

    def __init__(self, d_model, n_heads, d_ff, dropout=0.1):
        super().__init__()
        self.attention = MultiHeadAttention(d_model, n_heads, dropout)
        self.feed_forward = FeedForward(d_model, d_ff, dropout)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask=None):
        # Self-attention with residual connection
        attn_output = self.attention(self.norm1(x), mask)
        x = x + self.dropout(attn_output)

        # Feed-forward with residual connection
        ff_output = self.feed_forward(self.norm2(x))
        x = x + self.dropout(ff_output)

        return x


In [8]:
class NameGeneratorTransformer(nn.Module):
    """Complete transformer model for name generation"""

    def __init__(self, vocab_size, d_model=256, n_heads=8, n_layers=6,
                 d_ff=1024, max_length=32, dropout=0.1):
        super().__init__()

        self.d_model = d_model
        self.vocab_size = vocab_size
        self.max_length = max_length

        # Embedding layers
        self.token_embedding = nn.Embedding(vocab_size, d_model)
        self.positional_encoding = PositionalEncoding(d_model, max_length)

        # Transformer blocks
        self.transformer_blocks = nn.ModuleList([
            TransformerBlock(d_model, n_heads, d_ff, dropout)
            for _ in range(n_layers)
        ])

        # Output projection
        self.ln_f = nn.LayerNorm(d_model)
        self.head = nn.Linear(d_model, vocab_size)

        self.dropout = nn.Dropout(dropout)

        # Initialize weights
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def create_causal_mask(self, seq_len):
        """Create causal mask to prevent attention to future tokens"""
        mask = torch.tril(torch.ones(seq_len, seq_len))
        return mask.unsqueeze(0).unsqueeze(0)  # (1, 1, seq_len, seq_len)

    def forward(self, x, targets=None):
        batch_size, seq_len = x.size()

        # Token embeddings
        token_emb = self.token_embedding(x) * math.sqrt(self.d_model)

        # Add positional encoding
        x = self.dropout(self.positional_encoding(token_emb))

        # Create causal mask
        mask = self.create_causal_mask(seq_len).to(x.device)

        # Apply transformer blocks
        for block in self.transformer_blocks:
            x = block(x, mask)

        # Final layer norm and projection
        x = self.ln_f(x)
        logits = self.head(x)

        loss = None
        if targets is not None:
            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))

        return logits, loss


In [9]:
def train_model(model, dataloader, tokenizer, epochs=50, lr=3e-4):
    """Train the transformer model"""

    model.train()
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=0.01)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

    for epoch in range(epochs):
        total_loss = 0
        num_batches = 0

        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()

            logits, loss = model(inputs, targets)
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()

            total_loss += loss.item()
            num_batches += 1

            if batch_idx % 50 == 0:
                print(f'Epoch {epoch+1}/{epochs}, Batch {batch_idx}, Loss: {loss.item():.4f}')

        scheduler.step()
        avg_loss = total_loss / num_batches
        print(f'Epoch {epoch+1}/{epochs} completed. Average Loss: {avg_loss:.4f}')

        # Generate sample names every 10 epochs
        if (epoch + 1) % 10 == 0:
            print("Sample generated names:")
            sample_names = generate_names(model, tokenizer, num_names=5)
            for name in sample_names:
                print(f"  {name}")
            print()

def generate_names(model, tokenizer, num_names=5, max_length=20, temperature=0.8):
    """Generate new names using the trained model"""

    model.eval()
    generated_names = []

    with torch.no_grad():
        for _ in range(num_names):
            # Start with SOS token
            current_sequence = [tokenizer.char_to_idx['<SOS>']]

            for _ in range(max_length):
                # Convert to tensor
                input_tensor = torch.tensor([current_sequence], dtype=torch.long).to(device)

                # Get model predictions
                logits, _ = model(input_tensor)

                # Get logits for the last token
                next_token_logits = logits[0, -1, :] / temperature

                # Apply softmax to get probabilities
                probs = F.softmax(next_token_logits, dim=-1)

                # Sample next token
                next_token = torch.multinomial(probs, 1).item()

                # Check for EOS token
                if next_token == tokenizer.char_to_idx['<EOS>']:
                    break

                # Add to sequence
                current_sequence.append(next_token)

            # Decode the sequence (excluding SOS token)
            name = tokenizer.decode(current_sequence[1:])
            name = name.replace('<EOS>', '').replace('<PAD>', '').strip()

            if name:  # Only add non-empty names
                generated_names.append(name)

    model.train()  # Set back to training mode
    return generated_names


In [13]:

import pandas as pd

file_path = '/content/Indian-Male-Names.csv'

try:
    df = pd.read_csv(file_path)

    if 'name' in df.columns:
        # Extract the 'name' column and convert it to a list
        name_list = df['name'].tolist()

        # Print the first few names to verify
        print(f"Successfully read {len(name_list)} names.")
        print("First 10 names:")
        for i, name in enumerate(name_list[:10]):
            print(f"- {name}")

    else:
        print(f"Error: 'name' column not found in '{file_path}'. Available columns: {df.columns.tolist()}")

except FileNotFoundError:
    print(f"Error: File not found at '{file_path}'.")
except Exception as e:
    print(f"An error occurred: {e}")
name_list = [x for x in name_list if isinstance(x, str)]


Successfully read 14845 names.
First 10 names:
- barjraj
- ramdin verma
- sharat chandran
- birender mandal
- amit
- kushal
- kasid
- shiv prakash
- vikram singh
- sanjay


In [16]:
def main():
    # Sample names dataset (replace with your own)
    names = name_list

    print(f"Training on {len(names)} names")

    # Initialize tokenizer and build vocabulary
    tokenizer = CharacterTokenizer()
    tokenizer.build_vocab(names)

    # Create dataset and dataloader
    dataset = NameDataset(names, tokenizer, max_length=32)
    dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

    # Initialize model
    model = NameGeneratorTransformer(
        vocab_size=tokenizer.vocab_size,
        d_model=256,
        n_heads=8,
        n_layers=6,
        d_ff=1024,
        max_length=32,
        dropout=0.1
    ).to(device)

    print(f"Model has {sum(p.numel() for p in model.parameters())} parameters")

    # Train the model
    train_model(model, dataloader, tokenizer, epochs=100, lr=3e-4)

    # Save the model
    torch.save({
        'model_state_dict': model.state_dict(),
        'tokenizer_char_to_idx': tokenizer.char_to_idx,
        'tokenizer_idx_to_char': tokenizer.idx_to_char,
        'vocab_size': tokenizer.vocab_size
    }, 'name_generator_model.pth')

    print("Model saved successfully!")

    # Generate final examples
    print("\nFinal generated names:")
    final_names = generate_names(model, tokenizer, num_names=10, temperature=0.7)
    for i, name in enumerate(final_names, 1):
        print(f"{i:2d}. {name}")

if __name__ == "__main__":
    main()


Training on 14821 names
Vocabulary size: 90
Characters: [' ', '&', '(', ')', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '@', '[', '\\', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'ं', 'अ', 'आ', 'उ', 'ऐ', 'क', 'ख', 'ग', 'च', 'छ', 'ज', 'ण', 'त', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', '़', 'ा', 'ि', 'ी', 'ु', 'ू', 'े', 'ो', '्', '\u200d', '�']
Model has 4785242 parameters
Epoch 1/20, Batch 0, Loss: 4.6287
Epoch 1/20, Batch 50, Loss: 0.8113
Epoch 1/20, Batch 100, Loss: 0.7332
Epoch 1/20, Batch 150, Loss: 0.6891
Epoch 1/20, Batch 200, Loss: 0.6400
Epoch 1/20, Batch 250, Loss: 0.6154
Epoch 1/20, Batch 300, Loss: 0.6843
Epoch 1/20, Batch 350, Loss: 0.6035
Epoch 1/20, Batch 400, Loss: 0.6515
Epoch 1/20, Batch 450, Loss: 0.6174
Epoch 1/20, Batch 500, Loss: 0.6258
Epoch 1/20, Batch 550, Loss: 0.5650
Epoch 1/20, Batch 600, Loss: 0.5561
Epoch 1/

In [17]:
def load_model_and_generate():
    """Load trained model and generate new names"""

    # Load the saved model
    checkpoint = torch.load('name_generator_model.pth', map_location=device)

    # Recreate tokenizer
    tokenizer = CharacterTokenizer()
    tokenizer.char_to_idx = checkpoint['tokenizer_char_to_idx']
    tokenizer.idx_to_char = checkpoint['tokenizer_idx_to_char']
    tokenizer.vocab_size = checkpoint['vocab_size']

    # Recreate model
    model = NameGeneratorTransformer(
        vocab_size=tokenizer.vocab_size,
        d_model=256,
        n_heads=8,
        n_layers=6,
        d_ff=1024,
        max_length=32,
        dropout=0.1
    ).to(device)

    # Load model weights
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()

    print("Model loaded successfully!")

    # Interactive generation
    while True:
        try:
            num_names = int(input("\nHow many names to generate? (0 to exit): "))
            if num_names == 0:
                break

            temperature = float(input("Temperature (0.1-2.0, higher = more creative): ") or "0.8")
            temperature = max(0.1, min(2.0, temperature))

            print(f"\nGenerating {num_names} names with temperature {temperature}:")
            print("-" * 40)

            generated_names = generate_names(model, tokenizer, num_names, temperature=temperature)

            for i, name in enumerate(generated_names, 1):
                print(f"{i:2d}. {name.capitalize()}")

        except KeyboardInterrupt:
            break
        except ValueError:
            print("Please enter valid numbers.")

    print("Goodbye!")

def generate_with_prefix(model, tokenizer, prefix="", max_length=20, temperature=0.8):
    """Generate names starting with a specific prefix"""

    model.eval()

    with torch.no_grad():
        # Start with SOS token and prefix
        current_sequence = [tokenizer.char_to_idx['<SOS>']]

        # Add prefix characters
        for char in prefix.lower():
            if char in tokenizer.char_to_idx:
                current_sequence.append(tokenizer.char_to_idx[char])

        # Generate remaining characters
        for _ in range(max_length - len(prefix)):
            input_tensor = torch.tensor([current_sequence], dtype=torch.long).to(device)
            logits, _ = model(input_tensor)

            next_token_logits = logits[0, -1, :] / temperature
            probs = F.softmax(next_token_logits, dim=-1)
            next_token = torch.multinomial(probs, 1).item()

            if next_token == tokenizer.char_to_idx['<EOS>']:
                break

            current_sequence.append(next_token)

        # Decode (excluding SOS token)
        name = tokenizer.decode(current_sequence[1:])
        name = name.replace('<EOS>', '').replace('<PAD>', '').strip()

    return name

# Example usage for inference
if __name__ == "__main__":
    # Uncomment to run inference
    load_model_and_generate()
    # pass


Model loaded successfully!

How many names to generate? (0 to exit): 10
Temperature (0.1-2.0, higher = more creative): 1.9

Generating 10 names with temperature 1.9:
----------------------------------------
 1. Hawdik
 2. Tarun
 3. Nishant@mukhterju
 4. Ganni kumwसja
 5. Bhrop s/-v0`sha`hveय
 6. Deshraj
 7. Yhmed ahmad
 8. Tepyendra
 9. Hasa raj
10. Chachhchuqbu

How many names to generate? (0 to exit): 2
Temperature (0.1-2.0, higher = more creative): 2

Generating 2 names with temperature 2.0:
----------------------------------------
 1. Farha[t
 2. Cनवदील

How many names to generate? (0 to exit): 10
Temperature (0.1-2.0, higher = more creative): 2

Generating 10 names with temperature 2.0:
----------------------------------------
 1. Nikhi.
 2. Noor alamhe, अलाल@p
 3. श्री च
 4. Raghuram prehi
 5. Hariis
 6. Ctvan parkeet,bepual
 7. Schhiter
 8. Saaoad
 9. /o9यn
10. Awissh magwa @ jita)

How many names to generate? (0 to exit): 0
Goodbye!
