In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from transformers import pipeline
import tweepy
from flair.embeddings import TransformerDocumentEmbeddings
from flair.data import Sentence
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import cross_validate
import nltk
from utils import numerical_df
from baselines import xgboost_baseline, majority_class_baseline, random_class_baseline, ibm_baseline
from sklearn.metrics import f1_score as f1, precision_score as ps, recall_score as rs
from sklearn.model_selection import KFold

### Hyperparameters

In [2]:

df = numerical_df(pd.read_csv('full-sample-v4.csv'))
## Remove empty strings
df = df[df.tweet != '']
df = df[df.tweet.notnull()]

In [3]:
majority_class_baseline(df)

Unnamed: 0,Task,F1,Precision,Recall
0,argumentative,0.8099999999999999,0.8099999999999999,0.8099999999999999
1,claim,0.64,0.64,0.64
2,evidence,0.6749999999999999,0.6749999999999999,0.6749999999999999
3,procon,0.625,0.625,0.625


In [4]:
random_class_baseline(df)

Unnamed: 0,Task,F1,Precision,Recall
0,argumentative,0.68,0.68,0.68
1,claim,0.555,0.555,0.555
2,evidence,0.54,0.54,0.54
3,procon,0.425,0.425,0.425


In [5]:
xgboost_baseline(df)

Loading language model
Generating the embeddings
Generating results for argumentative
Fitting 3 folds for each of 104 candidates, totalling 312 fits


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    2.4s
[Parallel(n_jobs=10)]: Done 180 tasks      | elapsed:   19.6s
[Parallel(n_jobs=10)]: Done 312 out of 312 | elapsed:   40.6s finished


Generating results for claim
Fitting 3 folds for each of 104 candidates, totalling 312 fits


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    1.3s
[Parallel(n_jobs=10)]: Done 180 tasks      | elapsed:   23.4s
[Parallel(n_jobs=10)]: Done 312 out of 312 | elapsed:   53.2s finished


Generating results for evidence
Fitting 3 folds for each of 104 candidates, totalling 312 fits


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    1.3s
[Parallel(n_jobs=10)]: Done 180 tasks      | elapsed:   23.2s
[Parallel(n_jobs=10)]: Done 312 out of 312 | elapsed:   52.6s finished


Generating results for procon
Fitting 3 folds for each of 104 candidates, totalling 312 fits


[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    1.3s
[Parallel(n_jobs=10)]: Done 180 tasks      | elapsed:   14.6s
[Parallel(n_jobs=10)]: Done 312 out of 312 | elapsed:   29.4s finished


Unnamed: 0,Tasks,F1,Precision,Recall
0,argumentative,0.886563,0.818055,0.969118
1,claim,0.383641,0.505476,0.348214
2,evidence,0.323961,0.538571,0.259524
3,procon,0.698182,0.637143,0.78


In [6]:
ibm_baseline(df, ['argumentative', 'claim', 'evidence', 'procon'])

Gathering results for argumentative


ArgumentQualityClient: 100%|██████████| 200/200 [00:06<00:00, 28.79it/s]


Gathering results for claim


ClaimDetectionClient: 100%|██████████| 200/200 [00:30<00:00,  6.64it/s]


Gathering results for evidence


EvidenceDetectionClient: 100%|██████████| 200/200 [00:30<00:00,  6.64it/s]


Gathering results for procon


ProConClient: 100%|██████████| 80/80 [00:04<00:00, 16.30it/s]


Unnamed: 0,Tasks,F1,Precision,Recall
0,argumentative,0.741573,0.942857,0.611111
1,claim,0.42735,0.555556,0.347222
2,evidence,0.109589,0.5,0.061538
3,procon,0.8,0.8,0.8


In [7]:
ibm_baseline(df, 'procon')

Gathering results for procon


ProConClient: 100%|██████████| 80/80 [00:04<00:00, 17.20it/s]


Unnamed: 0,Tasks,F1,Precision,Recall
0,procon,0.8,0.8,0.8
