INSTALLING PACKAGES

In [None]:
!pip install accelerate -U
!pip install jiwer
!pip install datasets
!pip install lora
!pip install torch
!pip install evaluate
!pip install peft

IMPORT LIBRARIES

In [None]:
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, AdamW
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import csv
import torchaudio
from transformers import WhisperFeatureExtractor
from transformers import WhisperTokenizer
from transformers import WhisperProcessor
from transformers import WhisperForConditionalGeneration

SET DEVICE

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


LOADING THE MODEL


In [None]:
# Load pre-trained Whisper model and processor
feature_extractor = WhisperFeatureExtractor.from_pretrained("/content/tamilnew2")

# - Load Tokenizer: WhisperTokenizer
tokenizer = WhisperTokenizer.from_pretrained("/content/tamilnew2")

processor = WhisperProcessor.from_pretrained("/content/tamilnew2")

model = WhisperForConditionalGeneration.from_pretrained("/content/tamilnew2")
model.config.forced_decoder_ids = None
model.config.suppress_tokens = []

LOADING THE DATASET

In [None]:
audio_paths=[]
transcriptions = []

# Open the CSV file and read its contents
with open('dup.csv', newline='', encoding='utf-8') as csvfile:
    csv_reader = csv.reader(csvfile)
    # Skip the header row if it exists
    next(csv_reader, None)

    # Iterate through rows and append values to lists
    for row in csv_reader:
        audio_paths.append(row[0])
        transcriptions.append(row[1])

print("Column 1:", audio_paths)
print("Column 2:", transcriptions)

PROCESSING THE DATASET

In [None]:
# Process audio files and transcriptions
inputs = processor((torchaudio.load(audio_paths) for path in audio_paths), return_tensors="pt", padding=True, truncation=True)
# inputs = processor(audio_paths, return_tensors="pt", padding=True, truncation=True)
labels = processor(transcriptions, return_tensors="pt", padding=True, truncation=True)["input_ids"]

# Create a DataLoader
dataset = TensorDataset(inputs["input_values"].to(device), inputs["attention_mask"].to(device), labels.to(device))
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)


SETTING UP OPTIMIZER AND SCHEDULER

In [None]:
# Set up optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=5e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)


FINETUNING THE MODEL

In [None]:
# Fine-tune the model
num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch in tqdm(dataloader, desc=f"Epoch {epoch + 1}"):
        inputs = {"input_values": batch[0].to(device), "attention_mask": batch[1].to(device), "labels": batch[2].to(device)}

        optimizer.zero_grad()
        outputs = model(**inputs)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # Print average loss for the epoch
    print(f"Average Loss: {total_loss / len(dataloader)}")

    # Adjust learning rate
    scheduler.step()


SAVING THE MODEL

In [None]:
# Save the fine-tuned model
model.save_pretrained("fine_tuned_whisper_model")
processor.save_pretrained("fine_tuned_whisper_model")