# 🎯 GPT-Based Translation of Tamil Transcript to English
This notebook reads a Tamil transcript (from WhisperX), uses OpenAI GPT (4o or 3.5) to translate each segment with context, and saves the result in structured JSON format.

In [None]:
# ✅ Install OpenAI if not already installed
!pip install openai

In [None]:
# ✅ Import required libraries
import openai
import json

In [None]:
# 🔑 Set your OpenAI API key here
openai.api_key = "YOUR_OPENAI_API_KEY"  # Replace this with your actual API key

In [None]:
# 📥 Upload your Tamil transcript.json file (from WhisperX)
from google.colab import files
uploaded = files.upload()
filename = list(uploaded.keys())[0]

In [None]:
# 📚 Load transcript
with open(filename, "r", encoding="utf-8") as f:
    transcript_data = json.load(f)

segments = transcript_data.get("segments", [])

In [None]:
# 🔁 Translate each segment using GPT
translated_segments = []

for i, segment in enumerate(segments):
    prompt = f"Translate this Tamil call transcript to English, keeping context and meaning:\n\n{segment['text']}"

    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o",  # or "gpt-3.5-turbo"
            messages=[
                {"role": "system", "content": "You are a professional translator specializing in customer calls."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3
        )
        translated_text = response.choices[0].message["content"].strip()
    except Exception as e:
        print(f"Error translating segment {i}: {e}")
        translated_text = "ERROR"

    translated_segments.append({
        "start": segment["start"],
        "end": segment["end"],
        "text": translated_text
    })

In [None]:
# 💾 Save translated output to JSON
translated_json = {
    "language": "en",
    "segments": translated_segments
}

with open("transcript_translated.json", "w", encoding="utf-8") as f:
    json.dump(translated_json, f, indent=2, ensure_ascii=False)

print("✅ Translation complete. File saved as 'transcript_translated.json'")

In [None]:
# 📤 Download the translated file
files.download("transcript_translated.json")