In [10]:
import pandas as pd
import requests
import time

In [11]:

gsc_data = pd.read_csv('GSC+.csv', converters={'HPO_codes': eval, 'starts': eval, 'ends': eval, 'HPO_symptoms': eval})

In [12]:
def call_f29_translate_api(doc_id,text,endpoint,headers,params):
    
     #Translate the description of the condition symptom(s), default source language is English (en)
    data = '{"language_source":"en","segments":[{"id":"'+ str(doc_id)+ '","source":"'+ text +'"}]}'
    return requests.post(endpoint, headers=headers, params=params, data=data) 
    

In [13]:
def translate_english_spanish(doc_id,text, symptoms, hpo_codes):
    
    #define HTTP header, param and endpoint
    endpoint = 'https://f29bio-dev.northeurope.cloudapp.azure.com/api/Translation/document/translate-hack'
    headers = {'accept': 'text/plain','Content-Type': 'application/json',}
    params = (('lan', 'es'),) #Set target languages, default here is Spanish (es)
    
    #Translate the description of the condition (symptom(s))
    response = call_f29_translate_api(doc_id,text,endpoint,headers,params)
    
    if response.status_code == 200:
        
        output = response.json()
        file = output['segments'][0]['id']
        spanish_translation = output['segments'][0]['target']
        translated_symptoms = []
        
        #translate the symtoms themselves
        for symptom in symptoms:
            response = call_f29_translate_api(doc_id,symptom,endpoint,headers,params)
            if response.status_code == 200:
                output = response.json()
                translated_symptoms.append(output['segments'][0]['target'])
            else:
                return{}
            
        #Return all the data to mimic original file although, but also keep origianl english text         
        return {
            'Archivo': file,
            'Texto_espanol': spanish_translation,
            'Texto_inglés': text,
            'HPO_síntomas': translated_symptoms,
            'HPO_código': hpo_codes
        }
    else:
        return{}

In [14]:
#Translate all rows and create a dataframe

translation_rows = []
for row in gsc_data.itertuples():
    translation_output = translate_english_spanish(row[1], row[2], row[3], row[4])
    translation_rows.append(translation_output)
    if (row[0] + 1) % 10 == 0:
        print('Fetched {} rows'.format(row[0] + 1))
        time.sleep(1)
translation_df = pd.DataFrame(translation_rows)

In [24]:
#Drop all Nan values
translation_df.dropna()

Unnamed: 0,Archivo,Texto_espanol,Texto_inglés,HPO_síntomas,HPO_código
0,1003450,En cinco miembros de una familia italiana se r...,A syndrome of brachydactyly (absence of some m...,"[brachydactyly, ausencia de algunas falanges m...","[0001156, 0009881, 0001798, 0001792, 0100264, ..."
1,10051003,El síndrome de Townes-Brock (TBS) es un trasto...,Townes-Brocks syndrome (TBS) is an autosomal d...,"[trastorno autosómico dominante., autosómica d...","[0000006, 0000006, 0000006, 0003828, 0003813, ..."
2,10066029,El síndrome de carcinoma basocelular nevoide (...,Nevoid basal cell carcinoma syndrome (NBCCS) i...,"[carcinoma basocelular., autosómica dominante....","[0002671, 0000006, 0000006, 0000006, 0000006, ..."
3,10196695,El síndrome de Angélica (EA) es un trastorno d...,Angelman syndrome (AS) is a neurodevelopmental...,"[trastorno del neurodesarrollo, síndrome del g...","[0000707, 0001466]"
4,10417280,El síndrome de Prader-Willi (PWS) y el síndrom...,Prader-Willi syndrome (PWS) and Angelman syndr...,"[trastornos neuroconductuales, de Novo]","[0000708, 0003745]"
...,...,...,...,...,...
223,9831341,Hemos descrito previamente a un paciente con u...,We had previously described a patient with an ...,"[crecimiento, hiperfagia, obesidad, crecimiento]","[0001548, 0002591, 0001513, 0001548]"
224,9863591,Los pacientes que presentan schwanomas vestibu...,Patients who present with unilateral vestibula...,"[schwanomas vestibulares unilaterales., schwan...","[0009590, 0009588, 0100008, 0003593, 0003593, ..."
225,9931336,Se han identificado mutaciones del gen parchea...,Mutations of the human Patched gene ( PTCH ) h...,"[carcinoma basocelular., esporádicos, carcinom...","[0002671, 0003745, 0002671, 0002885, 0002671, ..."
226,9949213,Los elementos de repetición de copia baja se a...,"Transcribed, low-copy repeat elements are asso...","[anomalías del desarrollo, letalidad juvenil.]","[0001263, 0004149]"


In [None]:
#Save to file
translation_df.to_csv('GSC+_es_and_en.csv')