In [1]:
pip install deep-translator pandas

Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deep-translator
Successfully installed deep-translator-1.11.4


In [2]:
import xml.etree.ElementTree as ET
import pandas as pd
import re
from deep_translator import GoogleTranslator

In [9]:
# Load XML file
file_path = "/content/drive/MyDrive/NLP/eu_tarrifs_debate.xml"

In [10]:
# Parse xml file
tree = ET.parse(file_path)
root = tree.getroot()

In [13]:
# Extract TL-CHAP text and language
speech_data = []
for element in root.iter('TL-CHAP'):
    lang = element.attrib.get('VL', 'UNKNOWN')
    text = element.text.strip() if element.text else ""
    if text:
        speech_data.append({"language": lang, "text": text})

In [14]:
# Convert to DataFrame
df = pd.DataFrame(speech_data)

In [28]:
# Show column names
print("column names:", df.columns)

column names: Index(['language', 'text', 'translated_text', 'clean_text'], dtype='object')


In [17]:
# Translate non-English text
def translate_text(text, lang):
    if lang == "EN":
        return text
    try:
        return GoogleTranslator(source='auto', target='en').translate(text)
    except Exception as e:
        return f"[TRANSLATION ERROR] {e}"

df['translated_text'] = df.apply(lambda row: translate_text(row['text'], row['language']), axis=1)


In [18]:
# Clean text for modeling
# 4. Clean text for modeling
def clean_text(text):
    text = text.lower()
    text = re.sub(r"[^a-z\s]", " ", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

In [19]:
# Apply clean text to dataframe
df['clean_text'] = df['translated_text'].apply(clean_text)

In [27]:
# Show dataframe
df

Unnamed: 0,language,text,translated_text,clean_text
0,BG,Единна реакция на ЕС на неоправданите търговск...,EU Single Response to Unjustified U.S. Trade M...,eu single response to unjustified u s trade me...
1,ES,Respuesta unificada de la Unión a las injustif...,Unified response of the union to the unjustifi...,unified response of the union to the unjustifi...
2,CS,Jednotná reakce EU na neodůvodněná obchodní op...,Uniform EU reaction to unjustified US business...,uniform eu reaction to unjustified us business...
3,DA,En samlet reaktion fra EU's side på uberettige...,A unified reaction from the EU's part of unjus...,a unified reaction from the eu s part of unjus...
4,DE,Einheitliche Reaktion der EU auf die ungerecht...,Uniform reaction of the EU to the unjustified ...,uniform reaction of the eu to the unjustified ...
5,ET,ELi ühtne vastus USA põhjendamatutele kaubandu...,EU Uniform Response to Unjustified Trade Measu...,eu uniform response to unjustified trade measu...
6,EL,Μια ενιαία αντίδραση της ΕΕ στα αδικαιολόγητα ...,A single EU reaction to the unjustified US com...,a single eu reaction to the unjustified us com...
7,EN,A unified EU response to unjustified US trade ...,A unified EU response to unjustified US trade ...,a unified eu response to unjustified us trade ...
8,FR,Une réponse unifiée de l'Union aux mesures com...,A unified response from the Union to unjustifi...,a unified response from the union to unjustifi...
9,GA,Freagairt aontaithe ón Aontas ar bhearta trádá...,United States trade measures united from the U...,united states trade measures united from the u...
