**REVIEW TITLE TRANSLATION FROM BRAZALIAN TO ENGLISH**

In [None]:
import pandas as pd
import requests
import time
from tqdm import tqdm

# Load your reviews CSV (replace with your actual file path)
df = pd.read_csv("D:\Olist E-commerce Data\olist fact reviews.csv")

# Drop nulls from review_comment_title
df = df[df['review_comment_title'].notnull()].copy()

# Azure Translator details
subscription_key = "******************************************************************"  # Replace with your actual subscription key
endpoint = "https://api.cognitive.microsofttranslator.com/"
location = "centralindia"  # Change if you're using a specific Azure region

# Translation parameters
path = '/translate?api-version=3.0'
params = '&to=en'
constructed_url = endpoint + path + params

headers = {
    'Ocp-Apim-Subscription-Key': subscription_key,
    'Ocp-Apim-Subscription-Region': location,
    'Content-type': 'application/json'
}

# Batch translate
def translate_texts(texts):
    body = [{'text': text} for text in texts]
    response = requests.post(constructed_url, headers=headers, json=body)
    if response.status_code == 200:
        return [item['translations'][0]['text'] for item in response.json()]
    else:
        return [None] * len(texts)  # fallback in case of error

# Translate in batches with reduced delay
batch_size = 20
translated = []

for i in tqdm(range(0, len(df), batch_size)):
    batch = df['review_comment_title'].iloc[i:i + batch_size].tolist()
    translated_batch = translate_texts(batch)
    translated.extend(translated_batch)
    time.sleep(0.2)  # Reduced delay to 0.2 seconds

# Add translated column to DataFrame
df['translated_title'] = translated

# Save results
df.to_csv("translated_titles.csv", index=False)
print("✅ Translation complete and saved to translated_titles.csv")


100%|██████████| 576/576 [03:49<00:00,  2.51it/s]

✅ Translation complete and saved to translated_titles.csv





Successfully Translated Review Titles From Brazalian To English

**REVIEW TITLES CATEGORISATION USING NLP**

In [None]:
import pandas as pd
from transformers import pipeline
from tqdm import tqdm

# Load your CSV file (change path as needed)
df = pd.read_csv("translated_titles.csv")  
titles = df['translated_title'].astype(str).tolist()

# Load zero-shot classifier with GPU support
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0)

# Define your label categories
categories = [
    "Product Quality - Good",
    "Product Quality - Bad",
    "Delivery - On Time",
    "Delivery - Late",
    "Product Not Received",
    "Wrong Product",
    "Recommendation",
    "Cancellation",
    "Damaged Product",
    "Satisfied Customer",
    "Unsatisfied Customer"
]

# Process in batches
batch_size = 16
results = []

print("Processing reviews in batches...\n")

for i in tqdm(range(0, len(titles), batch_size)):
    batch = titles[i:i+batch_size]
    batch_results = classifier(batch, candidate_labels=categories)

    for result in batch_results:
        results.append(result['labels'][0])  # top prediction

# Add predictions to the DataFrame
df['review_category'] = results

# Save to CSV
df.to_csv("classified_reviews.csv", index=False)
print("\n✅ Done! All reviews have been classified and saved as 'classified_reviews.csv'")


Device set to use cuda:0


Processing reviews in batches...



100%|██████████| 720/720 [46:29<00:00,  3.87s/it]


✅ Done! All reviews have been classified and saved as 'classified_reviews.csv'





Sucessfully categorised Review Titles

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving translated_titles.csv to translated_titles.csv
User uploaded file "translated_titles.csv" with length 1937642 bytes
