# Follow the steps for proper preprocessing for translation purposes

1. replace only "?!." to "" from the rows.
2. convert to lower case
3. drop un-processed column(this would interfere with duplicate deletion, as un-processed and processed column will have different elements for a row)
4. delete duplicates 
5. now replace all [\W]. if we donot do step 1 at the beginning "Run!" and "Run." will not be considered as duplicates.
6. Do not remove stop words as we are translating, as removing them will reduce the dimension of the data
7. Import google translator and pandas. 
8. create batches of cleaned df, and translate one by one. While creating batches, use iloc methos and convert  it to pandas dataframe, providing with respective column names.
9. Don't forget to convert to csv, the translated df then and there.If not, we have to run every block of codes(of ever steps) everytime, which is extremely time consuming.
10. Finally merge the csv files and make it a single pandas dataframe.


In [22]:
import numpy as np
import pandas as pd

In [23]:

df=pd.read_csv(r"C:\Users\Sundaram\Downloads\English.csv",encoding='latin1')
df.head()

Unnamed: 0,English words/sentences
0,Hi.
1,Run!
2,Run!
3,Who?
4,Wow!


# Removing punctuation (.!<>{}’,”(/)-)

In [24]:
# Replacing (equivalent to [^a-zA-Z0-9_])) punctuations with space 
df['English'] = df['English words/sentences'].str.replace('[?.!]', "") 

  df['English'] = df['English words/sentences'].str.replace('[?.!]', "")


In [25]:
df.head(25)

Unnamed: 0,English words/sentences,English
0,Hi.,Hi
1,Run!,Run
2,Run!,Run
3,Who?,Who
4,Wow!,Wow
5,Fire!,Fire
6,Help!,Help
7,Jump.,Jump
8,Stop!,Stop
9,Stop!,Stop


# converting to lower case

In [26]:
# make entire text lowercase
df['English'] = [row.lower() for row in df['English']]
df

Unnamed: 0,English words/sentences,English
0,Hi.,hi
1,Run!,run
2,Run!,run
3,Who?,who
4,Wow!,wow
...,...,...
175616,"Top-down economics never works, said Obama. ""T...","top-down economics never works, said obama ""th..."
175617,A carbon footprint is the amount of carbon dio...,a carbon footprint is the amount of carbon dio...
175618,Death is something that we're often discourage...,death is something that we're often discourage...
175619,Since there are usually multiple websites on a...,since there are usually multiple websites on a...


# Dropping original/un-cleaned column

In [27]:
df=df.drop(columns=['English words/sentences'])

# Remove duplicates

In [28]:
print(df.shape)
df=df.drop_duplicates()
df.shape

(175621, 1)


(122974, 1)

In [29]:
df.head(25)

Unnamed: 0,English
0,hi
1,run
3,who
4,wow
5,fire
6,help
7,jump
8,stop
11,wait
13,go on


# Expanding contractions

In [30]:
import re

contraction_map={
    "ain't": "is not",
    "aren't": "are not",
    "can't": "cannot",
    "cause": "because",
    "could've": "could have",
    "couldn't": "could not",
    "couldn't've": "could not have",
    "didn't": "did not",
    "doesn't": "does not",
    "don't": "do not",
    "hadn't": "had not",
    "hadn't've": "had not have",
    "hasn't": "has not",
    "haven't": "have not",
    "he'd": "he would",
    "he'd've": "he would have",
    "he'll": "he will",
    "he'll've": "he will have",
    "he's": "he is",
    "how'd": "how did",
    "how'd've": "how did have",
    "how'll": "how will",
    "how's": "how is",
    "I'd": "I would",
    "I'd've": "I would have",
    "I'll": "I will",
    "I'll've": "I will have",
    "I'm": "I am",
    "I've": "I have",
    "i'd": "i would",
    "i'd've": "i would have",
    "i'll": "i will",
    "i'll've": "i will have",
    "i'm": "i am",
    "i've": "i have",
    "isn't": "is not",
    "it'd": "it would",
    "it'd've": "it would have",
    "it'll": "it will",
    "it'll've": "it will have",
    "it's": "it is",
    "let's": "let us",
    "ma'am": "madam",
    "mayn't": "may not",
    "might've": "might have",
    "mightn't": "might not",
    "mightn't've": "might not have",
    "must've": "might have",
    "mustn't": "must not",
    "mustn't've": "must not have",
    "needn't": "need not",
    "needn't've": "need not have",
    "o'clock": "of the clock",
    "oughtn't": "ought not",
    "oughtn't've": "ought not have",
    "shan't": "shall not",
    "shall'n't": "shall not",
    "shan't've": "shall not have",
    "she'd": "she would",
    "she'd've": "she would have",
    "she'll": "she will",
    "she'll've": "she will have",
    "she's": "she is",
    "should've": "should have",
    "shouldn't": "should not",
    "shouldn't've": "should not have",
    "so've": "so have",
    "so's": "so as",
    "that'd": "that would",
    "that'd've": "that would have",
    "that's": "that is",
    "there'd": "there would",
    "there'd've": "there would have",
    "there's": "there is",
    "they'd": "they would",
    "they'd've": "they would have",
    "they'll": "they will",
    "they'll've": "they will have",
    "they're": "they are",
    "they've": "they have",
    "to've": "to have",
    "wasn't": "was not",
    "we'd": "we would",
    "we'd've": "we would have",
    "we'll": "we will",
    "we'll've": "we will have",
    "we're": "we are",
    "weren't": "were not",
    "what'll": "what will",
    "what'll've": "what will have",
    "what're": "what are",
    "what's": "what is",
    "what've": "what have",
    "when's": "when is",
    "when've": "when have",
    "where'd": "where did",
    "where's": "where is",
    "where've": "where have",
    "who'll": "who will",
    "who'll've": "who will have",
    "who's": "who is",
    "who've": "who have",
    "why's": "why is",
    "why've": "why have",
    "will've": "will have",
    "won't": "will not",
    "will't've": "will not have",
    "would've": "would have",
    "would't": "would not",
    "would't've": "would not have",
    "y'all": "you all",
    "y'all'd": "you all would",
    "y'all'd've": "you all would have",
    "y'all're": "you all are",
    "y'all've": "you have all",
    "you'd": "you would",
    "you'd've": "you would have",
    "you'll": "you will",
    "you'll've": "you will have",
    "you're": "you are",
    "you've": "you have",
}
#write a function to expand contractions in the text

def expand_contractions(sent, mapping):
    #pattern for matching contraction with their expansions
    pattern = re.compile('({})'.format('|'.join(mapping.keys())), flags=re.IGNORECASE|re.DOTALL)
    
    def expand_map(contraction):
        #using group method to access subgroups of the match
        match = contraction.group(0)
        #to retain correct case of the word
        first_char = match[0]
        #find out the expansion
        expansion = mapping.get(match) if mapping.get(match) else mapping.get(match.lower())
        expansion = first_char + expansion[1:]
        return expansion
    #using sub method to replace all contractions with their expansions for a sentence
    #function expand_map will be called for every non overlapping occurence of the pattern
    expand_sent = pattern.sub(expand_map, sent)
    return expand_sent
    #test our function its work correct or not:
test = "I've, I'll,didn't, don't"
test_function = expand_contractions(test, contraction_map)
print("Test:   "+ test)
print("Result: "+test_function,"\n\nSuccesfully expanded")

Test:   I've, I'll,didn't, don't
Result: I have, I will,did not, do not 

Succesfully expanded


In [31]:
# Expanding Contractions in the reviews
df['English_cleaned']=df['English'].apply(lambda x:expand_contractions(x,contraction_map))
df

Unnamed: 0,English,English_cleaned
0,hi,hi
1,run,run
3,who,who
4,wow,wow
5,fire,fire
...,...,...
175616,"top-down economics never works, said obama ""th...","top-down economics never works, said obama ""th..."
175617,a carbon footprint is the amount of carbon dio...,a carbon footprint is the amount of carbon dio...
175618,death is something that we're often discourage...,death is something that we are often discourag...
175619,since there are usually multiple websites on a...,since there are usually multiple websites on a...


In [42]:
# Replacing (equivalent to [^a-zA-Z0-9_])) punctuations with space 
df['English_cleaned'] = df['English_cleaned'].str.replace('[\W]', " ") 
df

  df['English_cleaned'] = df['English_cleaned'].str.replace('[\W]', " ")


Unnamed: 0,English,English_cleaned
0,hi,hi
1,run,run
3,who,who
4,wow,wow
5,fire,fire
...,...,...
175616,"top-down economics never works, said obama ""th...",top down economics never works said obama th...
175617,a carbon footprint is the amount of carbon dio...,a carbon footprint is the amount of carbon dio...
175618,death is something that we're often discourage...,death is something that we are often discourag...
175619,since there are usually multiple websites on a...,since there are usually multiple websites on a...


# we are not removing stop words as we are translating from one language to another
## Removing Stopwords Begin
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk import word_tokenize

stop_words = stopwords.words('english')



## Function to remove stop words 
def remove_stopwords(eng):
    # iNPUT : IT WILL TAKE ROW/REVIEW AS AN INPUT
    # take the paragraph, break into words, check if the word is a stop word, remove if stop word, combine the words into a para again
    tokenized = word_tokenize(eng)
    eng_new = " ".join([i for i in tokenized  if i not in stop_words])
    return eng_new

## Removing stopwords
df['English'] = [remove_stopwords(r) for r in df['English']]

# importing google translator


In [37]:
conda install googletrans==3.1.0a0

Collecting googletrans==3.1.0a0

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 5.2.2 requires pyqt5<5.13, which is not installed.
spyder 5.2.2 requires pyqtwebengine<5.13, which is not installed.
anaconda-project 0.11.1 requires ruamel-yaml, which is not installed.
conda-repo-cli 1.0.20 requires clyent==1.2.1, but you have clyent 1.2.2 which is incompatible.
conda-repo-cli 1.0.20 requires nbformat==5.4.0, but you have nbformat 5.5.0 which is incompatible.



  Downloading googletrans-3.1.0a0.tar.gz (19 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting httpx==0.13.3
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
     -------------------------------------- 55.1/55.1 kB 179.4 kB/s eta 0:00:00
Collecting rfc3986<2,>=1.3
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)
Collecting httpcore==0.9.*
  Downloading httpcore-0.9.1-py3-none-any.whl (42 kB)
     -------------------------------------- 42.6/42.6 kB 348.4 kB/s eta 0:00:00
Collecting idna==2.*
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
     -------------------------------------- 58.8/58.8 kB 522.3 kB/s eta 0:00:00
Collecting chardet==3.*
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
     -------------------------------------- 133.4/133.4 kB 1.3 MB/s eta 0:00:00
Collecting hstspreload
  Downloading hstspreload-2022.11.1-py3-none-any.whl (1.4 MB)
     ----------------------------------------

In [43]:
import pandas as pd
from googletrans import Translator
translator = Translator()

# Creating Batches of df

In [39]:
df_1 =pd.DataFrame(df.iloc[0:2500,:].values, columns = ["English","English_cleaned" ])
df_1["Spanish"] = df_1['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_1["French"] = df_1['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_1["German"] = df_1['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_1.to_csv('Transla_1.csv') 


In [11]:

df_2 =pd.DataFrame(df.iloc[2500:5000,:].values, columns = ["English","English_cleaned" ])
df_2["Spanish"] = df_2['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_2["French"] = df_2['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_2["German"] = df_2['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_2.to_csv('Transla_2.csv')

In [12]:
df_3 =pd.DataFrame(df.iloc[5000:6000,:].values, columns = ["English","English_cleaned" ])
df_3["Spanish"] = df_3['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_3["French"] = df_3['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_3["German"] = df_3['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_3.to_csv('Transla_3.csv')

In [14]:
df_4 =pd.DataFrame(df.iloc[6000:7000,:].values, columns = ["English","English_cleaned" ])
df_4["Spanish"] = df_4['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_4["French"] = df_4['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_4["German"] = df_4['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_4.to_csv('Transla_4.csv')

In [15]:
df_5 =pd.DataFrame(df.iloc[7000:8000,:].values, columns = ["English","English_cleaned" ])
df_5["Spanish"] = df_5['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_5["French"] = df_5['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_5["German"] = df_5['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_5.to_csv('Transla_5.csv')

In [16]:
df_6 =pd.DataFrame(df.iloc[8000:9000,:].values, columns = ["English","English_cleaned" ])
df_6["Spanish"] = df_6['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_6["French"] = df_6['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_6["German"] = df_6['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_6.to_csv('Transla_6.csv')

In [30]:
df_7 =pd.DataFrame(df.iloc[9000:10000,:].values, columns = ["English","English_cleaned" ])
df_7["Spanish"] = df_7['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_7["French"] = df_7['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_7["German"] = df_7['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_7.to_csv('Transla_7.csv')

In [44]:
df_8 =pd.DataFrame(df.iloc[10000:11000,:].values, columns = ["English","English_cleaned" ])
df_8["Spanish"] = df_8['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_8["French"] = df_8['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_8["German"] = df_8['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_8.to_csv('Transla_8.csv')

In [45]:
df_9 =pd.DataFrame(df.iloc[11000:12000,:].values, columns = ["English","English_cleaned" ])
df_9["Spanish"] = df_9['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_9["French"] = df_9['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_9["German"] = df_9['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_9.to_csv('Transla_9.csv')

In [46]:
df_10 =pd.DataFrame(df.iloc[12000:13000,:].values, columns = ["English","English_cleaned" ])
df_10["Spanish"] = df_10['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_10["French"] = df_10['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_10["German"] = df_10['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_10.to_csv('Transla_10.csv')

In [47]:
df_11 =pd.DataFrame(df.iloc[13000:14000,:].values, columns = ["English","English_cleaned" ])
df_11["Spanish"] = df_11['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_11["French"] = df_11['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_11["German"] = df_11['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_11.to_csv('Transla_11.csv')

In [48]:
df_12 =pd.DataFrame(df.iloc[14000:15000,:].values, columns = ["English","English_cleaned" ])
df_12["Spanish"] = df_12['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_12["French"] = df_12['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_12["German"] = df_12['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_12.to_csv('Transla_12.csv')

In [49]:
df_13 =pd.DataFrame(df.iloc[15000:16000,:].values, columns = ["English","English_cleaned" ])
df_13["Spanish"] = df_13['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_13["French"] = df_13['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_13["German"] = df_13['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_13.to_csv('Transla_13.csv')

In [50]:
df_14 =pd.DataFrame(df.iloc[16000:17000,:].values, columns = ["English","English_cleaned" ])
df_14["Spanish"] = df_14['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_14["French"] = df_14['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_14["German"] = df_14['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_14.to_csv('Transla_14.csv')

In [51]:
df_15 =pd.DataFrame(df.iloc[17000:18000,:].values, columns = ["English","English_cleaned" ])
df_15["Spanish"] = df_15['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_15["French"] = df_15['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_15["German"] = df_15['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_15.to_csv('Transla_15.csv')

In [52]:
df_16 =pd.DataFrame(df.iloc[18000:19000,:].values, columns = ["English","English_cleaned" ])
df_16["Spanish"] = df_16['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_16["French"] = df_16['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_16["German"] = df_16['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_16.to_csv('Transla_16.csv')

In [53]:
df_17 =pd.DataFrame(df.iloc[19000:20000,:].values, columns = ["English","English_cleaned" ])
df_17["Spanish"] = df_17['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_17["French"] = df_17['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_17["German"] = df_17['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_17.to_csv('Transla_17.csv')

In [54]:
df_18 =pd.DataFrame(df.iloc[20000:21250,:].values, columns = ["English","English_cleaned" ])
df_18["Spanish"] = df_18['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_18["French"] = df_18['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_18["German"] = df_18['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_18.to_csv('Transla_18.csv')

In [55]:
df_19 =pd.DataFrame(df.iloc[21250:22500,:].values, columns = ["English","English_cleaned" ])
df_19["Spanish"] = df_19['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_19["French"] = df_19['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_19["German"] = df_19['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_19.to_csv('Transla_19.csv')

In [57]:
df_20 =pd.DataFrame(df.iloc[22500:23750,:].values, columns = ["English","English_cleaned" ])
df_20["Spanish"] = df_20['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_20["French"] = df_20['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_20["German"] = df_20['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_20.to_csv('Transla_20.csv')

In [58]:
df_21 =pd.DataFrame(df.iloc[23750:25000,:].values, columns = ["English","English_cleaned" ])
df_21["Spanish"] = df_21['English_cleaned'].apply(translator.translate, src='en', dest='es').apply(getattr, args=('text',))
df_21["French"] = df_21['English_cleaned'].apply(translator.translate, src='en', dest='fr').apply(getattr, args=('text',))
df_21["German"] = df_21['English_cleaned'].apply(translator.translate, src='en', dest='de').apply(getattr, args=('text',))
df_21.to_csv('Transla_21.csv')

# merging all csv's to a single file.

In [17]:
merger = pd.concat(map(pd.read_csv, ['Transla_1.csv', 'Transla_2.csv','Transla_3.csv','Transla_4.csv','Transla_5.csv','Transla_6.csv','Transla_7.csv','Transla_8.csv','Transla_9.csv','Transla_10.csv','Transla_11.csv','Transla_12.csv','Transla_13.csv','Transla_14.csv','Transla_15.csv','Transla_16.csv','Transla_17.csv','Transla_18.csv','Transla_19.csv','Transla_20.csv','Transla_21.csv',]), ignore_index=True)
merger

Unnamed: 0.1,Unnamed: 0,English,English_cleaned,Spanish,French,German
0,0,hi,hi,hola,salut,hallo
1,1,run,run,correr,Cours,Lauf
2,2,who,who,quién,qui,wer
3,3,wow,wow,guau,wow,Beeindruckend
4,4,fire,fire,fuego,Feu,Feuer
...,...,...,...,...,...,...
24995,1245,are there any bananas,are there any bananas,¿hay plátanos?,y a-t-il des bananes,Gibt es hier Bananen
24996,1246,are these your things,are these your things,son estas tus cosas,c'est tes affaires,sind das deine Sachen
24997,1247,are they all the same,are they all the same,son todos iguales,Sont-ils tous identiques,sind sie alle gleich
24998,1248,are they still in bed,are they still in bed,¿Siguen en la cama?,sont-ils encore au lit,sind sie noch im bett


In [18]:
merger.columns

Index(['Unnamed: 0', 'English', 'English_cleaned', 'Spanish', 'French',
       'German'],
      dtype='object')

In [19]:
merger= merger.drop(columns=['Unnamed: 0'])
merger

Unnamed: 0,English,English_cleaned,Spanish,French,German
0,hi,hi,hola,salut,hallo
1,run,run,correr,Cours,Lauf
2,who,who,quién,qui,wer
3,wow,wow,guau,wow,Beeindruckend
4,fire,fire,fuego,Feu,Feuer
...,...,...,...,...,...
24995,are there any bananas,are there any bananas,¿hay plátanos?,y a-t-il des bananes,Gibt es hier Bananen
24996,are these your things,are these your things,son estas tus cosas,c'est tes affaires,sind das deine Sachen
24997,are they all the same,are they all the same,son todos iguales,Sont-ils tous identiques,sind sie alle gleich
24998,are they still in bed,are they still in bed,¿Siguen en la cama?,sont-ils encore au lit,sind sie noch im bett


In [21]:
merger.to_csv('Translated_25000.csv')