In [26]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from transformers import T5Tokenizer, T5ForConditionalGeneration
import os
import matplotlib.pyplot as plt
from tqdm import tqdm

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Read the mudra dataset
def load_data(file_path='Mudra_Recitation_dataset.csv'):
    df = pd.read_csv(file_path)
    print(f"Loaded dataset with {len(df)} mudra entries")
    return df

# Custom dataset class for the mudra data
class MudraDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = str(self.labels[idx])
        
        encoding = self.tokenizer(
            text,
            text_pair=None,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        target_encoding = self.tokenizer(
            label,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': target_encoding['input_ids'].flatten(),
            'decoder_attention_mask': target_encoding['attention_mask'].flatten()
        }

# Function to prepare dataloaders
def prepare_dataloaders(df, tokenizer, batch_size=4, max_length=128, test_size=0.2):
    # Combine mudra name and meditation focus as input
    texts = df.apply(lambda row: f"Mudra: {row['mudra_name']}, Focus: {row['meditation_focus']}", axis=1).values
    labels = df['recitation'].values
    
    # Split data into train and validation sets
    train_texts, val_texts, train_labels, val_labels = train_test_split(
        texts, labels, test_size=test_size, random_state=42
    )
    
    # Create datasets
    train_dataset = MudraDataset(train_texts, train_labels, tokenizer, max_length)
    val_dataset = MudraDataset(val_texts, val_labels, tokenizer, max_length)
    
    # Create dataloaders
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True
    )
    
    val_dataloader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False
    )
    
    return train_dataloader, val_dataloader

# Training function
def train_model(model, train_dataloader, val_dataloader, optimizer, scheduler, epochs=10, 
                eval_every=1, save_path='mudra_model'):
    
    # Create directory for saving model
    os.makedirs(save_path, exist_ok=True)
    
    # For tracking metrics
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        total_train_loss = 0
        
        # Progress bar for training
        train_progress = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{epochs} [Train]")
        
        for batch in train_progress:
            # Move batch to device
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            decoder_attention_mask = batch['decoder_attention_mask'].to(device)
            
            # Clear gradients
            model.zero_grad()
            
            # Forward pass
            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels,
                decoder_attention_mask=decoder_attention_mask
            )
            
            loss = outputs.loss
            total_train_loss += loss.item()
            
            # Backward pass
            loss.backward()
            
            # Clip gradients to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            # Update weights
            optimizer.step()
            scheduler.step()
            
            # Update progress bar
            train_progress.set_postfix({'loss': loss.item()})
        
        # Calculate average training loss
        avg_train_loss = total_train_loss / len(train_dataloader)
        train_losses.append(avg_train_loss)
        
        # Evaluate model if it's evaluation time
        if (epoch + 1) % eval_every == 0:
            model.eval()
            total_val_loss = 0
            
            # Progress bar for validation
            val_progress = tqdm(val_dataloader, desc=f"Epoch {epoch+1}/{epochs} [Val]")
            
            with torch.no_grad():
                for batch in val_progress:
                    # Move batch to device
                    input_ids = batch['input_ids'].to(device)
                    attention_mask = batch['attention_mask'].to(device)
                    labels = batch['labels'].to(device)
                    decoder_attention_mask = batch['decoder_attention_mask'].to(device)
                    
                    # Forward pass
                    outputs = model(
                        input_ids=input_ids,
                        attention_mask=attention_mask,
                        labels=labels,
                        decoder_attention_mask=decoder_attention_mask
                    )
                    
                    loss = outputs.loss
                    total_val_loss += loss.item()
                    
                    # Update progress bar
                    val_progress.set_postfix({'loss': loss.item()})
            
            # Calculate average validation loss
            avg_val_loss = total_val_loss / len(val_dataloader)
            val_losses.append(avg_val_loss)
            
            # Print loss metrics
            print(f"Epoch {epoch+1}/{epochs}")
            print(f"  Train Loss: {avg_train_loss:.4f}")
            print(f"  Val Loss: {avg_val_loss:.4f}")
            
            # Save model if it's the best so far
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                model_path = os.path.join(save_path, f"best_model_epoch_{epoch+1}.pt")
                torch.save(model.state_dict(), model_path)
                print(f"  New best model saved to {model_path}")
        
    # Save the final model
    final_model_path = os.path.join(save_path, "final_model.pt")
    torch.save(model.state_dict(), final_model_path)
    print(f"Final model saved to {final_model_path}")
    
    # Plot loss metrics
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, epochs+1), train_losses, 'b-', label='Training Loss')
    plt.plot([i * eval_every for i in range(1, len(val_losses)+1)], val_losses, 'r-', label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_path, 'loss_plot.png'))
    plt.close()
    
    return train_losses, val_losses

# Function to generate recitations using the trained model
def generate_recitation(model, tokenizer, mudra_name, meditation_focus=None, max_length=100):
    model.eval()
    
    # Prepare input text
    if meditation_focus:
        input_text = f"Mudra: {mudra_name}, Focus: {meditation_focus}"
    else:
        input_text = f"Mudra: {mudra_name}"
    
    # Tokenize input
    inputs = tokenizer(
        input_text,
        add_special_tokens=True,
        return_tensors="pt",
        max_length=128,
        padding='max_length',
        truncation=True
    )
    
    # Move to device
    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)
    
    # Generate output
    output_ids = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=max_length,
        num_return_sequences=1,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=0.7,
        repetition_penalty=1.2,
        early_stopping=True
    )
    
    # Decode output
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    return generated_text

# Main function to run the entire process
def main():
    # Load data
    df = load_data()
    
    # Initialize T5 tokenizer and model
    model_name = "t5-small"  # You can use "t5-base" for better results
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)
    model.to(device)
    
    # Prepare dataloaders
    train_dataloader, val_dataloader = prepare_dataloaders(df, tokenizer)
    
    # Set up optimizer and scheduler
    optimizer = AdamW(model.parameters(), lr=5e-5)
    total_steps = len(train_dataloader) * 10  # 10 epochs
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=total_steps
    )
    
    # Train model
    print("Starting training...")
    train_losses, val_losses = train_model(
        model, 
        train_dataloader, 
        val_dataloader, 
        optimizer, 
        scheduler,
        epochs=10
    )
    
    # Generate some examples
    print("\nGenerating example recitations:")
    test_mudras = ["Abhaya Mudra", "Dhyana Mudra", "Jñāna Mudra"]
    
    for mudra in test_mudras:
        meditation_focus = df[df['mudra_name'] == mudra]['meditation_focus'].values[0]
        generated = generate_recitation(model, tokenizer, mudra, meditation_focus)
        print(f"\nMudra: {mudra}")
        print(f"Focus: {meditation_focus}")
        print(f"Generated: {generated}")
        print(f"Original: {df[df['mudra_name'] == mudra]['recitation'].values[0]}")

if __name__ == "__main__":
    main()

Using device: cuda
Loaded dataset with 16 mudra entries


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]



Starting training...


Epoch 1/10 [Train]:   0%|          | 0/3 [00:00<?, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
Epoch 1/10 [Train]: 100%|██████████| 3/3 [00:02<00:00,  1.04it/s, loss=8.96]
Epoch 1/10 [Val]: 100%|██████████| 1/1 [00:00<00:00, 12.91it/s, loss=3.27]


Epoch 1/10
  Train Loss: 7.0758
  Val Loss: 3.2679
  New best model saved to mudra_model\best_model_epoch_1.pt


Epoch 2/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  3.22it/s, loss=5.94]
Epoch 2/10 [Val]: 100%|██████████| 1/1 [00:00<00:00,  9.23it/s, loss=1.88]


Epoch 2/10
  Train Loss: 6.4762
  Val Loss: 1.8816
  New best model saved to mudra_model\best_model_epoch_2.pt


Epoch 3/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  3.69it/s, loss=6.23]
Epoch 3/10 [Val]: 100%|██████████| 1/1 [00:00<00:00,  9.67it/s, loss=1.53]


Epoch 3/10
  Train Loss: 5.1959
  Val Loss: 1.5284
  New best model saved to mudra_model\best_model_epoch_3.pt


Epoch 4/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  3.97it/s, loss=3.97]
Epoch 4/10 [Val]: 100%|██████████| 1/1 [00:00<00:00,  8.41it/s, loss=1.29]


Epoch 4/10
  Train Loss: 4.2733
  Val Loss: 1.2862
  New best model saved to mudra_model\best_model_epoch_4.pt


Epoch 5/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  4.14it/s, loss=4.21]
Epoch 5/10 [Val]: 100%|██████████| 1/1 [00:00<00:00, 14.12it/s, loss=1.17]


Epoch 5/10
  Train Loss: 3.7431
  Val Loss: 1.1724
  New best model saved to mudra_model\best_model_epoch_5.pt


Epoch 6/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  3.76it/s, loss=2.86]
Epoch 6/10 [Val]: 100%|██████████| 1/1 [00:00<00:00, 13.34it/s, loss=1.13]


Epoch 6/10
  Train Loss: 2.8980
  Val Loss: 1.1253
  New best model saved to mudra_model\best_model_epoch_6.pt


Epoch 7/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  4.04it/s, loss=2.5] 
Epoch 7/10 [Val]: 100%|██████████| 1/1 [00:00<00:00, 13.54it/s, loss=1.1]


Epoch 7/10
  Train Loss: 3.1161
  Val Loss: 1.1047
  New best model saved to mudra_model\best_model_epoch_7.pt


Epoch 8/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  4.06it/s, loss=2.54]
Epoch 8/10 [Val]: 100%|██████████| 1/1 [00:00<00:00, 10.76it/s, loss=1.09]


Epoch 8/10
  Train Loss: 2.8957
  Val Loss: 1.0948
  New best model saved to mudra_model\best_model_epoch_8.pt


Epoch 9/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  3.88it/s, loss=2.86]
Epoch 9/10 [Val]: 100%|██████████| 1/1 [00:00<00:00, 14.29it/s, loss=1.09]


Epoch 9/10
  Train Loss: 3.0438
  Val Loss: 1.0905
  New best model saved to mudra_model\best_model_epoch_9.pt


Epoch 10/10 [Train]: 100%|██████████| 3/3 [00:00<00:00,  4.21it/s, loss=3.69]
Epoch 10/10 [Val]: 100%|██████████| 1/1 [00:00<00:00, 14.48it/s, loss=1.09]


Epoch 10/10
  Train Loss: 3.1666
  Val Loss: 1.0891
  New best model saved to mudra_model\best_model_epoch_10.pt
Final model saved to mudra_model\final_model.pt

Generating example recitations:





Mudra: Abhaya Mudra
Focus: Invoke fearlessness and protective energy
Generated: Mudra: Abhaya Mudra: Abhaya Mudra: Abhaya Mudra: Abhaya Mudra: Abhaya Mudra: Abhaya Mudra, Focus: Invoke fearlessness and protective energy
Original: Monk: The fearless heart protects all beings. Let courage arise in you now.

Mudra: Dhyana Mudra
Focus: Deepen concentration and mental stillness
Generated: Mudra: Dhyana Mudra: Dhyana Mudra, Focus: Dhyana Mudra: Dhyana Mudra: Dhyana Mudra: Dhyana Mudra: Dhyana Mudra, Focus: Deepen concentration and mental stillness
Original: Monk: In stillness the lotus of awareness opens. Breathe into presence.

Mudra: Jñāna Mudra
Focus: Open to wisdom and insight
Generated: Mudra: Jna Mudra: Jna Mudra, Focus: Open to wisdom and insight
Original: Monk: Wisdom’s lamp shines within; let it illuminate your path.
