## Carga de librerías

In [167]:
import pandas as pd
import gdown
from joblib import dump
import warnings
warnings.filterwarnings('ignore')
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report, f1_score

## Descarga del corpus

In [2]:
DATASET_URL = "https://drive.google.com/drive/folders/17rMvLszfo-DQoIvzG1CusIQNwhnLKpB_"
gdown.download_folder(DATASET_URL, quiet=False)

Retrieving folder list


Retrieving folder 1a1tYlGlQmadf7zsYz3PPnTrplSHjM8lU subtask_1
Retrieving folder 12Fe7jpVeenEZGckFFQQA7-CVgPMBMIK3 en
Processing file 1dQxym7dPsw38VStCYPqsKGByDuq06XPY test.tsv
Processing file 1syrlebhgbvTg9tCZ-S2MOpwhFBNtGXXD train.tsv
Retrieving folder 1qQoUs57x_G1O-lP_OMPuuLlD_1TF-WoI es
Processing file 1DL0MZB8St7yAmsh70Y3sH8NQaoSzdZYs test.tsv
Processing file 1r6hAZp3mPldfhhKHrIMgQ33HiwzvB5O3 train.tsv
Retrieving folder 1m4yAowbTXz_IBvNRHOuDGQqZnza298Qd subtask_2
Retrieving folder 1_nNywlj-RYMW1tfkFVoEDLJLcVWNX9a6 en
Processing file 1-MEoYsUqdub_zTt8O6-9fGQtdSQloGHA test.tsv
Processing file 1nLCyHGDs8PiS15ZkwtCQ0_GscFhvR5RB train.tsv
Retrieving folder 1dwAWhHaWVaoiptD-JR0rL98XHJ04gbLP es
Processing file 1efLhja6Lr5B3ALu_X19ULpzYVj-8TN3Q test.tsv
Processing file 1F5mhR6tIRZCHhCwvzSEwXiEytNrpc6W9 train.tsv
Building directory structure completed


Retrieving folder list completed
Building directory structure
Downloading...
From: https://drive.google.com/uc?id=1dQxym7dPsw38VStCYPqsKGByDuq06XPY
To: c:\Users\jonatan.pace\Documents\mbda-autex\AuTexTificationDataset\subtask_1\en\test.tsv
100%|██████████| 7.92M/7.92M [00:01<00:00, 6.27MB/s]
Downloading...
From: https://drive.google.com/uc?id=1syrlebhgbvTg9tCZ-S2MOpwhFBNtGXXD
To: c:\Users\jonatan.pace\Documents\mbda-autex\AuTexTificationDataset\subtask_1\en\train.tsv
100%|██████████| 13.1M/13.1M [00:02<00:00, 4.86MB/s]
Downloading...
From: https://drive.google.com/uc?id=1DL0MZB8St7yAmsh70Y3sH8NQaoSzdZYs
To: c:\Users\jonatan.pace\Documents\mbda-autex\AuTexTificationDataset\subtask_1\es\test.tsv
100%|██████████| 7.80M/7.80M [00:01<00:00, 4.17MB/s]
Downloading...
From: https://drive.google.com/uc?id=1r6hAZp3mPldfhhKHrIMgQ33HiwzvB5O3
To: c:\Users\jonatan.pace\Documents\mbda-autex\AuTexTificationDataset\subtask_1\es\train.tsv
100%|██████████| 12.8M/12.8M [00:02<00:00, 5.27MB/s]
Downloading.

['c:\\Users\\jonatan.pace\\Documents\\mbda-autex\\AuTexTificationDataset\\subtask_1\\en\\test.tsv',
 'c:\\Users\\jonatan.pace\\Documents\\mbda-autex\\AuTexTificationDataset\\subtask_1\\en\\train.tsv',
 'c:\\Users\\jonatan.pace\\Documents\\mbda-autex\\AuTexTificationDataset\\subtask_1\\es\\test.tsv',
 'c:\\Users\\jonatan.pace\\Documents\\mbda-autex\\AuTexTificationDataset\\subtask_1\\es\\train.tsv',
 'c:\\Users\\jonatan.pace\\Documents\\mbda-autex\\AuTexTificationDataset\\subtask_2\\en\\test.tsv',
 'c:\\Users\\jonatan.pace\\Documents\\mbda-autex\\AuTexTificationDataset\\subtask_2\\en\\train.tsv',
 'c:\\Users\\jonatan.pace\\Documents\\mbda-autex\\AuTexTificationDataset\\subtask_2\\es\\test.tsv',
 'c:\\Users\\jonatan.pace\\Documents\\mbda-autex\\AuTexTificationDataset\\subtask_2\\es\\train.tsv']

## Carga del corpus

In [3]:
# Definimos la tarea 1 en el lenguaje español
subtask = "subtask_1"
language = "es"

In [4]:
# Cargamos el corpus en dataframes de pandas
df_train = pd.read_csv(
    f"AuTexTificationDataset/{subtask}/{language}/train.tsv", delimiter="\t"
)
df_test = pd.read_csv(
    f"AuTexTificationDataset/{subtask}/{language}/test.tsv", delimiter="\t"
)

In [5]:
# Vemos los primeros registros del dataset de entrenamiento
df_train.head()

Unnamed: 0,id,prompt,text,label,model,domain
0,5464,NO-PROMPT,Entrada en vigor. La presente Directiva entrar...,human,NO-MODEL,legal
1,30129,"Estos podrían ser preguntas, categorías de inf...",Preguntas: 1. ¿Cuáles son los principales argu...,generated,F,wiki
2,19553,-¿Desea algo? -Póngame una caja,¿Desea algo? Póngame una caja de madera. ¿Qué ...,generated,E,tweets
3,13005,NO-PROMPT,"@victor28088 1665 Tweets no originales, que as...",human,NO-MODEL,tweets
4,16919,NO-PROMPT,De pequeño Dios me dio a elegir entre tener un...,human,NO-MODEL,tweets


In [6]:
df_test.head()

Unnamed: 0,id,text,domain
0,17414,Buscábamos tranquilidad y la encontramos. Me t...,reviews
1,16938,"Nos sorprendió la cena, si vas con media pensi...",reviews
2,17379,Servicio atento y magnificas vistas al rio.,reviews
3,5391,La Oficina Nacional de Estadísticas de China d...,news
4,17310,Pero no puedes tener a una sola persona sirvie...,reviews


## Separación del dataset train en entrenamiento y validación

In [9]:
X_train, X_val, y_train, y_val = train_test_split(
    df_train["text"], 
    df_train["label"], 
    stratify = df_train["label"], 
    random_state = 42, 
    test_size = 0.3 )

In [10]:
X_train.head()

24284    Había una vez una tortuguita que fue a su prim...
21740    También puedes comprar tarjetas de regalo de A...
18482    Para la emisión de la declaración de inversion...
9942     Si el cangrejo está muerto, comenzará a descom...
12645    #SiMeEncontraraCon Una ZikITa ReGÁAeTtoÑera &l...
Name: text, dtype: object

In [11]:
y_train.head()

24284        human
21740    generated
18482    generated
9942         human
12645        human
Name: label, dtype: object

In [13]:
# Valdidamos si las clases están valanceadas en la partición que usaremos para entrenar
y_train.value_counts()

generated    11392
human        11051
Name: label, dtype: int64

In [11]:
X_val.head()

18613    Artículo 1. Se autoriza el uso como aditivo en...
11869    Aquí es donde entra en juego el no caer en pán...
8698     1. El presente Reglamento entrará en vigor a l...
21587    Ya que el crecimiento de las bacterias en el c...
5521     El estado de agotamiento del contingente se co...
Name: text, dtype: object

In [20]:
type(X_val)

pandas.core.series.Series

In [14]:
y_val.value_counts()

generated    4883
human        4736
Name: label, dtype: int64

## Vectorización de textos

In [58]:
# Iniciamos vectorizadores aplicando algunas variantes

vectorizer_cv_word11 = CountVectorizer(analyzer="word", ngram_range=(1, 1), max_features=5000)
vectorizer_cv_char12 = CountVectorizer(analyzer="char", ngram_range=(1, 2), max_features=5000)
vectorizer_cv_char23 = CountVectorizer(analyzer="char", ngram_range=(1, 3), max_features=5000)
vectorizer_tf_char12 = TfidfVectorizer(analyzer="char", ngram_range=(1, 2), max_features=5000)
vectorizer_tf_char23 = TfidfVectorizer(analyzer="char", ngram_range=(2, 3), max_features=5000)


# Vectorizamos el dataset de entrenamiento

vect_train_cv_word11 = vectorizer_cv_word11.fit_transform(X_train)
vect_train_cv_char12 = vectorizer_cv_char12.fit_transform(X_train)
vect_train_cv_char23 = vectorizer_cv_char23.fit_transform(X_train)
vect_train_tf_char12 = vectorizer_tf_char12.fit_transform(X_train)
vect_train_tf_char23 = vectorizer_tf_char23.fit_transform(X_train)

# Vectorizamos el dataset de validación

vect_val_cv_word11 = vectorizer_cv_word11.transform(X_val)
vect_val_cv_char12 = vectorizer_cv_char12.transform(X_val)
vect_val_cv_char23 = vectorizer_cv_char23.transform(X_val)
vect_val_tf_char12 = vectorizer_tf_char12.transform(X_val)
vect_val_tf_char23 = vectorizer_tf_char23.transform(X_val)

# Entrenamiento de modelos

In [51]:
# Función para imprimir las métricas de performance de los modelos

def compute_scores(y_preds):
    # Compute the macro-F1 score
    substask = "subtask_1"
    mf1 = f1_score(y_true=y_val, y_pred=y_preds, average="macro")
    
    print(f'f1-macro: {mf1}')
    print()
    # Compute the confusion matrix
    labels = ["generated", "human"] if substask == "subtask_1" else ["A", "B", "C", "D", "E", "F"]
    conf_matrix = confusion_matrix(y_true=y_val, y_pred=y_preds, labels=labels)

    print(conf_matrix)

    # Compute a classification report
    clf_report = classification_report(y_true=y_val, y_pred=y_preds)

    print(clf_report)

## Regresión Lineal

In [52]:
# Modelo usando CountVectorizer con 1 n-grams de palabras
# -------------------------------------------------------

# Iniciamos el modelo
lr_cv_word11 = LogisticRegression()

# Entrenamos el modelo
lr_cv_word11.fit(vect_train_cv_word11, y_train)

# Obtenemos las predicciones 
lr_cv_word11_preds = lr_cv_word11.predict(vect_val_cv_word11)

# Evaluación y métricas del modelo

compute_scores(lr_cv_word11_preds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


f1-macro: 0.7849544298360964

[[3852 1031]
 [1037 3699]]
              precision    recall  f1-score   support

   generated       0.79      0.79      0.79      4883
       human       0.78      0.78      0.78      4736

    accuracy                           0.79      9619
   macro avg       0.78      0.78      0.78      9619
weighted avg       0.79      0.79      0.79      9619



In [53]:
# Modelo usando CountVectorizer con 1 a 2 n-grams de caracteres
# -------------------------------------------------------------

# Iniciamos el modelo
lr_cv_char12 = LogisticRegression()

# Entrenamos el modelo
lr_cv_char12.fit(vect_train_cv_char12, y_train)

# Obtenemos las predicciones 
lr_cv_char12_preds = lr_cv_char12.predict(vect_val_cv_char12)

# Evaluación y métricas del modelo

compute_scores(lr_cv_char12_preds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


f1-macro: 0.7285969011153804

[[3579 1304]
 [1306 3430]]
              precision    recall  f1-score   support

   generated       0.73      0.73      0.73      4883
       human       0.72      0.72      0.72      4736

    accuracy                           0.73      9619
   macro avg       0.73      0.73      0.73      9619
weighted avg       0.73      0.73      0.73      9619



In [55]:
# Modelo usando CountVectorizer con 2 a 3 n-grams de caracteres
# -------------------------------------------------------------

# Iniciamos el modelo
lr_cv_char23 = LogisticRegression()

# Entrenamos el modelo
lr_cv_char23.fit(vect_train_cv_char23, y_train)

# Obtenemos las predicciones 
lr_cv_char23_preds = lr_cv_char23.predict(vect_val_cv_char23)

# Evaluación y métricas del modelo

compute_scores(lr_cv_char23_preds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


f1-macro: 0.7889627261088625

[[3900  983]
 [1046 3690]]
              precision    recall  f1-score   support

   generated       0.79      0.80      0.79      4883
       human       0.79      0.78      0.78      4736

    accuracy                           0.79      9619
   macro avg       0.79      0.79      0.79      9619
weighted avg       0.79      0.79      0.79      9619



In [59]:
# Modelo usando TfidfVectorizer con 1 a 2 n-grams de caracteres
# -------------------------------------------------------------

# Iniciamos el modelo
lr_tf_char12 = LogisticRegression()

# Entrenamos el modelo
lr_tf_char12.fit(vect_train_tf_char12, y_train)

# Obtenemos las predicciones 
lr_tf_char12_preds = lr_tf_char12.predict(vect_val_tf_char12)

# Evaluación y métricas del modelo

compute_scores(lr_tf_char12_preds)

f1-macro: 0.7276612635736723

[[3763 1120]
 [1492 3244]]
              precision    recall  f1-score   support

   generated       0.72      0.77      0.74      4883
       human       0.74      0.68      0.71      4736

    accuracy                           0.73      9619
   macro avg       0.73      0.73      0.73      9619
weighted avg       0.73      0.73      0.73      9619



In [60]:
# Modelo usando TfidfVectorizer con 2 a 3 n-grams de caracteres
# -------------------------------------------------------------

# Iniciamos el modelo
lr_tf_char23 = LogisticRegression()

# Entrenamos el modelo
lr_tf_char23.fit(vect_train_tf_char23, y_train)

# Obtenemos las predicciones 
lr_tf_char23_preds = lr_tf_char23.predict(vect_val_tf_char23)

# Evaluación y métricas del modelo

compute_scores(lr_tf_char23_preds)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


f1-macro: 0.7948631680058522

[[3985  898]
 [1073 3663]]
              precision    recall  f1-score   support

   generated       0.79      0.82      0.80      4883
       human       0.80      0.77      0.79      4736

    accuracy                           0.80      9619
   macro avg       0.80      0.79      0.79      9619
weighted avg       0.80      0.80      0.79      9619



## Support Vector Machine

In [62]:
# Modelo usando TfidfVectorizer con 2 a 3 n-grams de caracteres
# -------------------------------------------------------------

# Iniciamos el modelo
svm_tf_char23 = svm.SVC()

# Entrenamos el modelo
svm_tf_char23.fit(vect_train_tf_char23, y_train)

# Obtenemos las predicciones 
svm_tf_char23_preds = svm_tf_char23.predict(vect_val_tf_char23)

# Evaluación y métricas del modelo

compute_scores(svm_tf_char23_preds)

f1-macro: 0.8324128982490834

[[4125  758]
 [ 853 3883]]
              precision    recall  f1-score   support

   generated       0.83      0.84      0.84      4883
       human       0.84      0.82      0.83      4736

    accuracy                           0.83      9619
   macro avg       0.83      0.83      0.83      9619
weighted avg       0.83      0.83      0.83      9619



## Fine-Tuning

In [68]:
# Iniciamos el modelo
lr_tf_char23_tunned = LogisticRegression()

# Definimos los parámetros para la búsqueda en cuadrícula
parameters = {
    'C': [0.1, 1.0, 10.0],
    'penalty': ['l1', 'l2']
}

# Creamos el objeto GridSearchCV
grid_search = GridSearchCV(lr_tf_char23, parameters, cv=5)

# Realizamos la búsqueda en cuadrícula y entrenamos el modelo
grid_search.fit(vect_train_tf_char23, y_train)

# Obtenemos el mejor modelo encontrado por la búsqueda en cuadrícula
best_model = grid_search.best_estimator_


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [69]:
best_model

# Entrenamiento del modelo final con todo el corpus disponible

## Vectorizado del corpus completo con el mejor método encontrado durante el entrenamiento

In [106]:
# Convertimos el corpus de entrenamiento y test en un pandas series

X_train_all = df_train[['text']].squeeze()
y_train_all = df_train[['label']].squeeze()

X_test = df_test[['text']].squeeze()

# Vectorizamos el corpus completo de entrenamiento (datos etiquetados) y el de test (datos sin etiquetar)

vect_train_all_tf_char23 = vectorizer_tf_char23.fit_transform(X_train_all)
vect_test_tf_char23 = vectorizer_tf_char23.transform(X_test)


In [158]:
# Modelo de regresión logística usando TfidfVectorizer con 2 a 3 n-grams de caracteres
# ---------------------------------------------------------------------------------

# Iniciamos el modelo
lr_tf_char23_tunned = LogisticRegression(C=10)

# Entrenamos el modelo
lr_tf_char23_tunned.fit(vect_train_all_tf_char23, y_train_all)

# Obtenemos las predicciones 
lr_tf_char23_preds = lr_tf_char23_tunned.predict(vect_test_tf_char23)

# Obtenemos las predicciones con sus probabilidades
lr_tf_char23_preds_proba = lr_tf_char23_tunned.predict_proba(vect_test_tf_char23)

In [159]:
lr_tf_char23_preds

array(['generated', 'human', 'generated', ..., 'generated', 'generated',
       'generated'], dtype=object)

In [160]:
lr_tf_char23_preds_proba

array([[0.74329888, 0.25670112],
       [0.13604077, 0.86395923],
       [0.54804559, 0.45195441],
       ...,
       [0.87518561, 0.12481439],
       [0.87795661, 0.12204339],
       [0.76129209, 0.23870791]])

In [178]:
# Predicciones con el modelo de support vector machine entrenado solo con parte del corpus de training y sin optimizar hiperparámetros

svm_tf_char23_preds = svm_tf_char23.predict(vect_test_tf_char23)


In [163]:
# Añadimos las columnas de predicciones al dataset de test

df_test['label_lr_generated_proba'] = lr_tf_char23_preds_proba[:,0]
df_test['label_lr_human_proba'] = lr_tf_char23_preds_proba[:,1]

In [179]:
# Añadimos la columna de etiquetas

df_test['label_lr_tunned_tr50'] = np.where(df_test['label_lr_generated_proba'] > .5, 'generated', 'human')
df_test['label_lr_tunned_tr80'] = np.where(df_test['label_lr_generated_proba'] > .8, 'generated', 'human')
df_test['label_svm_tr50'] = svm_tf_char23_preds
df_test


Unnamed: 0,id,text,domain,label_lr_generated_proba,label_lr_human_proba,label_lr_tunned_tr50,label_lr_tunned_tr80,label_svm_tr50
0,17414,Buscábamos tranquilidad y la encontramos. Me t...,reviews,0.743299,0.256701,generated,human,human
1,16938,"Nos sorprendió la cena, si vas con media pensi...",reviews,0.136041,0.863959,human,human,human
2,17379,Servicio atento y magnificas vistas al rio.,reviews,0.548046,0.451954,generated,human,generated
3,5391,La Oficina Nacional de Estadísticas de China d...,news,0.580462,0.419538,generated,human,human
4,17310,Pero no puedes tener a una sola persona sirvie...,reviews,0.085232,0.914768,human,human,human
...,...,...,...,...,...,...,...,...
20124,11284,Pero no fue un problema para mí en absoluto! E...,reviews,0.963334,0.036666,generated,generated,generated
20125,11964,"Para mí, no hay nada más importante en la vida...",reviews,0.983017,0.016983,generated,generated,generated
20126,5390,El sindicato Futbolistas Argentinos Agremiados...,news,0.875186,0.124814,generated,generated,human
20127,860,Incluso ahora se siente joven. Fidel Castro es...,news,0.877957,0.122043,generated,generated,human


In [190]:
df_test.label_lr_tunned_tr50.value_counts()

generated    15942
human         4187
Name: label_lr_tunned_tr50, dtype: int64

In [191]:
df_test.label_lr_tunned_tr80.value_counts()

generated    10573
human         9556
Name: label_lr_tunned_tr80, dtype: int64

In [192]:
df_test.label_svm_tr50.value_counts()

human        14592
generated     5537
Name: label_svm_tr50, dtype: int64

## Guardamos el fichero test añadiendo la columna con las predicciones

In [188]:
jonmarcas_lrTfChar23TunnedTr50 = df_test.rename(columns={'label_lr_tunned_tr50': 'label'})[['id', 'label']]
jonmarcas_lrTfChar23TunnedTr80 = df_test.rename(columns={'label_lr_tunned_tr80': 'label'})[['id', 'label']]
jonmarcas_svmTfChar23baseTr50 = df_test.rename(columns={'label_svm_tr50': 'label'})[['id', 'label']]

jonmarcas_lrTfChar23TunnedTr50.to_csv('jonmarcas_lrTfChar23TunnedTr50.csv', sep='\t', index=False)
jonmarcas_lrTfChar23TunnedTr80.to_csv('jonmarcas_lrTfChar23TunnedTr80.csv', sep='\t', index=False)
jonmarcas_svmTfChar23baseTr50.to_csv('jonmarcas_svmTfChar23baseTr50.csv', sep='\t', index=False)

In [189]:
df_test

Unnamed: 0,id,text,domain,label_lr_generated_proba,label_lr_human_proba,label_lr_tunned_tr50,label_lr_tunned_tr80,label_svm_tr50
0,17414,Buscábamos tranquilidad y la encontramos. Me t...,reviews,0.743299,0.256701,generated,human,human
1,16938,"Nos sorprendió la cena, si vas con media pensi...",reviews,0.136041,0.863959,human,human,human
2,17379,Servicio atento y magnificas vistas al rio.,reviews,0.548046,0.451954,generated,human,generated
3,5391,La Oficina Nacional de Estadísticas de China d...,news,0.580462,0.419538,generated,human,human
4,17310,Pero no puedes tener a una sola persona sirvie...,reviews,0.085232,0.914768,human,human,human
...,...,...,...,...,...,...,...,...
20124,11284,Pero no fue un problema para mí en absoluto! E...,reviews,0.963334,0.036666,generated,generated,generated
20125,11964,"Para mí, no hay nada más importante en la vida...",reviews,0.983017,0.016983,generated,generated,generated
20126,5390,El sindicato Futbolistas Argentinos Agremiados...,news,0.875186,0.124814,generated,generated,human
20127,860,Incluso ahora se siente joven. Fidel Castro es...,news,0.877957,0.122043,generated,generated,human
