In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from ast import literal_eval

In [2]:
df_arg = pd.read_csv("./veracity_arg_predicted.csv")
df = pd.read_csv("./veracity_predicted.csv")

In [3]:
def split_list(row):
    return pd.Series(row)

In [4]:
def convert_list(df,feature):
  df[feature] = df[feature].apply(literal_eval)
  df[[f'{feature}_false', f'{feature}_true']] = df[feature].apply(split_list)
  return df

In [5]:
def parse_df(df):
  df = convert_list(df,'sentiment_probabilities')
  df = convert_list(df,'veracity_probabilities')
  return df


In [6]:
df_arg = parse_df(df_arg)
df = parse_df(df)

In [8]:
df_total = pd.concat([df,df_arg])

## Modelado

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [10]:
def calculate_metrics(target,predicted):
  accuracy = accuracy_score(target,predicted)
  precision = precision_score(target,predicted)
  recall = recall_score(target,predicted)
  f1 = f1_score(target,predicted)

  return accuracy,precision,recall,f1

In [11]:
print(df.dtypes)


clasificacion                      int64
texto                             object
sentiment_label                    int64
sentiment_probabilities           object
veracity_label                     int64
veracity_probabilities            object
sentiment_probabilities_false    float64
sentiment_probabilities_true     float64
veracity_probabilities_false     float64
veracity_probabilities_true      float64
dtype: object


In [12]:
def split_dataset(df):
  X = df.drop(columns=['texto', 'clasificacion','sentiment_probabilities','veracity_probabilities'])
  y = df['clasificacion']

  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
  return X_train,y_train,X_val,y_val,X_test,y_test


In [13]:
def join_dataset(df1,df2):
    return pd.concat([df1,df2])

In [14]:
X_train,y_train,X_val,y_val,X_test,y_test = split_dataset(df)
X_train_arg,y_train_arg,X_val_arg,y_val_arg,X_test_arg,y_test_arg = split_dataset(df_arg)

In [16]:
X_train_total = join_dataset(X_train,X_train_arg)
y_train_total = join_dataset(y_train,y_train_arg)

X_val_total = join_dataset(X_val,X_val_arg)
y_val_total = join_dataset(y_val,y_val_arg)

X_test_total = join_dataset(X_test,X_test_arg)
y_test_total = join_dataset(y_test,y_test_arg)

In [17]:
X_train_total

Unnamed: 0,sentiment_label,veracity_label,sentiment_probabilities_false,sentiment_probabilities_true,veracity_probabilities_false,veracity_probabilities_true
46350,0,1,0.999944,0.000056,0.275400,0.724600
37474,0,0,0.999953,0.000047,0.998939,0.001061
86551,0,1,0.999901,0.000099,0.279638,0.720362
76595,1,1,0.003416,0.996584,0.003494,0.996506
21035,0,0,0.998761,0.001239,0.998995,0.001005
...,...,...,...,...,...,...
264,0,0,0.999947,0.000053,0.916958,0.083042
1739,0,0,0.999877,0.000123,0.842591,0.157409
88,0,0,0.999362,0.000638,0.688785,0.311215
484,1,1,0.012661,0.987339,0.474193,0.525807


In [18]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [19]:
history = model.fit(X_train_total, y_train_total, epochs=10, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)



In [21]:
accuracy,precision,recall,f1 = calculate_metrics(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.8343378400390943
Precision: 0.8018460800767202
Recall: 0.9461103253182461
F1-score: 0.8680249156501427


In [22]:
best_accuracy_model = None
best_f1_model = None
best_accuracy = 0
best_f1= 0

for num_layers in [1, 2, 3]:
    for num_neurons in [32, 64, 128]:
        print(f"Model - num layers: {num_layers}, num_neurons: {num_neurons}")
        model = Sequential()
        for _ in range(num_layers):
            model.add(Dense(num_neurons, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))

        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

        y_pred_prob = model.predict(X_test)
        y_pred = (y_pred_prob > 0.5).astype(int)

        accuracy,precision,recall,f1 = calculate_metrics(y_test, y_pred)

        print("Accuracy:", accuracy)
        print("Precision:", precision)
        print("Recall:", recall)
        print("F1-score:", f1)

        if accuracy > best_accuracy:
          best_accuracy_model =f"Model - num layers: {num_layers}, num_neurons: {num_neurons}"
          best_accuracy = accuracy

        if f1 > best_f1:
          best_f1_model = f"Model - num layers: {num_layers}, num_neurons: {num_neurons}"
          best_f1 = f1
        print("----------------------------------")

Model - num layers: 1, num_neurons: 32
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.822405929304447
Precision: 0.824991691591891
Recall: 0.8777934936350777
F1-score: 0.8505739249614527
----------------------------------
Model - num layers: 1, num_neurons: 64
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.828758755497638
Precision: 0.8094822752476482
Recall: 0.9188826025459689
F1-score: 0.8607200821436853
----------------------------------
Model - num layers: 1, num_neurons: 128
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.8323016777976869
Precision: 0.8053625837903717
Recall: 0.9346534653465347
F1-score: 0.8652045826513912
----------------------------------
Model - num layers: 2, num_neurons: 32
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/

In [23]:
print(f"Best Accuracy Model: {best_accuracy_model} - Best accuracy: {best_accuracy}")
print(f"Best F1 Model: {best_f1_model} - Best F1-Score: {best_f1}")

Best Accuracy Model: Model - num layers: 2, num_neurons: 128 - Best accuracy: 0.8374328066460336
Best F1 Model: Model - num layers: 2, num_neurons: 128 - Best F1-Score: 0.8718788112202324


In [31]:
num_neurons = 128
num_layers= 2
epochs = 10

In [32]:
model = Sequential()
for _ in range(num_layers):
    model.add(Dense(num_neurons, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=epochs, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [33]:
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

accuracy,precision,recall,f1 = calculate_metrics(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.8373513601563772
Precision: 0.7980611045828437
Recall: 0.9606082036775107
F1-score: 0.8718228498074455
