In [3]:
def process_txt_file(file_path, column_number):
    """
    Processes the specified column of a text file, converting all words to lowercase,
    and writes the changes back to the same file with columns separated by tabs.
    
    Args:
    file_path (str): The path to the .txt file.
    column_number (int): The column number to process (1 or 2).
    """
    if column_number not in [1, 2]:
        raise ValueError("Column number must be 1 or 2")

    processed_lines = []

    with open(file_path, 'r') as file:
        for line in file:
            columns = line.strip().split('\t')
            if len(columns) < column_number:
                continue  # Skip lines that don't have enough columns

            # Convert the specified column to lowercase
            columns[column_number - 1] = columns[column_number - 1].lower()
            processed_lines.append("\t".join(columns))
    
    # Write the processed lines back to the same file
    with open(file_path, 'w') as file:
        file.write("\n".join(processed_lines))
    
    print(f"Processing complete. Changes saved to '{file_path}'.")

In [1]:
def process_txt_file_both_columns(file_path):
    """
    Processes both columns of a text file, converting all words to lowercase,
    and writes the changes back to the same file with columns separated by tabs.
    
    Args:
    file_path (str): The path to the .txt file.
    """
    processed_lines = []

    with open(file_path, 'r') as file:
        for line in file:
            columns = line.strip().split('\t')
            if len(columns) < 2:
                continue  # Skip lines that don't have enough columns

            # Convert both columns to lowercase
            columns[0] = columns[0].lower()
            columns[1] = columns[1].lower()
            processed_lines.append("\t".join(columns))
    
    # Write the processed lines back to the same file
    with open(file_path, 'w') as file:
        file.write("\n".join(processed_lines))
    
    print(f"Processing complete. Changes saved to '{file_path}'.")

In [21]:
file_path = 'word_pairs/[Swedish - Italian].txt'

process_txt_file_both_columns(file_path)

Processing complete. Changes saved to 'word_pairs/[Swedish - Italian].txt'.


In [11]:
file_path = 'word_pairs/[French - German].txt'

process_txt_file(file_path, 1)

Processing complete. Changes saved to 'word_pairs/[French - German].txt'.


# Translator functions

In [1]:
from googletrans import Translator
t = Translator()


def extract_src_lang(filename, column=1):
    clea_filename = filename.strip('"')
    bracket_content = clea_filename[clea_filename.find('[') + 1:clea_filename.find(']')]
    languages = bracket_content.split('-')
    if column == 1:
        src_lang = languages[0].strip()
    elif  column == 2:
        src_lang = languages[1].strip()
    
    return(src_lang)




def translate_words(filename, dest_lang, column=1):
    codes = ['en', 'de', 'fr', 'es', 'it', 'pt', 'nl', 'sv','af']
    languages = ['English', 'German', 'French', 'Spanish', 'Italian', 'Portuguese', 'Dutch', 'Swedish','Afrikaans']
    
    first_column = []
    second_column = []
    with open(filename, 'r') as f:
        for line in f.readlines():
            parts = line.strip().split('\t')
            if len(parts) == 2:
                first_column.append(parts[0])
                second_column.append(parts[1])
                
    if column == 1:
        src_words = first_column
    elif column == 2:
         src_words = second_column
         
         
    src_lang = codes[languages.index(extract_src_lang(filename,column))]             
                      
    tgt_words = []            
    for word in src_words:
        t_word = t.translate(word, src=src_lang, dest=dest_lang).text
        tgt_words.append(t_word)
        
    combined = [f"{word1}\t{word2}" for word1, word2 in zip(src_words,tgt_words)]
    
    
    with open(f"[{languages[codes.index(src_lang)]} - {languages[codes.index(dest_lang)]}].txt", 'w') as file:
        for item in combined:
            file.write(item + "\n")
            
def swap_cols(filename):
    with open(filename, 'r') as f:
        swapped_lines = []
        for line in f.readlines():
            columns = line.strip().split('\t')
            if len(columns) == 2:
                swapped_line = f"{columns[1]}\t{columns[0]}"
                swapped_lines.append(swapped_line)
            else:
                swapped_lines.append(line.strip())
                
    first_lang = extract_src_lang(filename,column=1)
    second_lang = extract_src_lang(filename,column=2)
    with open(f"[{second_lang} - {first_lang}].txt", 'w') as file:
        for swapped_line in swapped_lines:
            file.write(swapped_line + "\n")             

In [34]:
swap_cols("[ - Spanish].txt")

In [46]:
translate_words('[German - Spanish].txt', 'pt', 1)

swap_cols("[German - Portuguese].txt")

In [11]:
translate_words('[Spanish - Portuguese].txt', 'sv', 2)