In [7]:
from tqdm.auto import tqdm
import numpy as np
import pandas as pd

from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer


In [2]:
train_df = pd.read_csv('../data/review_references_title_abstracts_sample_train.csv')
test_df = pd.read_csv('../data/review_references_title_abstracts_sample_test.csv')

In [3]:
TARGET_COL_NAME = 'review'

In [4]:
tfidf_transformer = TfidfVectorizer(
    ngram_range=(1, 2),
    analyzer='word',
    lowercase=True,
    max_features=50000,
    stop_words='english'
)

In [5]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=17)

In [12]:
reference_features = [
         'references_count',
       'references_contradicted_total', 
    'references_contradicted_avg',
       'references_mentioned_total',
    'references_mentioned_total_avg',
       'references_supported_total', 
    'references_supported_avg',
       'in_text_citations_to_references_total',
       'in_text_citations_to_references_total_avg',
       'citations_to_references_total', 'citations_to_references_total_avg',
       'contradiction_percentage_avg', 'mentioning_percentage_avg',
      'supporting_percentage_avg',
    'contradiction_to_supporting_ratio_avg',
        'contradiction_to_supporting_contradiction_ratio_avg',
        'supporting_to_supporting_contradiction_ratio_avg'
]
INPUT_COL_NAMES =  [
        'title_abstract',

] + reference_features


logreg = LogisticRegression(
    C=1,
    random_state=42,
    solver='sag',
    n_jobs=-1,
    max_iter=500
)

column_transformer = ColumnTransformer(
    [('tfidf1', tfidf_transformer, 'title_abstract'),
     ('scaler', StandardScaler(), reference_features)]
)

model = Pipeline([
    ('column_transform', column_transformer),
    ('logit', logreg)
])


acc_scores, prec_scores, cv_f1_scores, recall_scores = [], [], [], []
skf_split_generator = skf.split(X=train_df[INPUT_COL_NAMES], y=train_df[TARGET_COL_NAME])

for fold_id, (train_idx, val_idx) in tqdm(enumerate(skf_split_generator)):
    curr_train_df = train_df.iloc[train_idx]
    curr_val_df = train_df.iloc[val_idx]
    
    model.fit(X=curr_train_df[INPUT_COL_NAMES], y=curr_train_df[TARGET_COL_NAME])
    
    # making predictions for the current validation set
    curr_preds = model.predict(X=curr_val_df[INPUT_COL_NAMES])
    curr_f1 = f1_score(y_true=curr_val_df[TARGET_COL_NAME], y_pred=curr_preds)
    curr_acc = accuracy_score(y_true=curr_val_df[TARGET_COL_NAME], y_pred=curr_preds)
    curr_prec = precision_score(y_true=curr_val_df[TARGET_COL_NAME], y_pred=curr_preds)
    curr_recall = recall_score(y_true=curr_val_df[TARGET_COL_NAME], y_pred=curr_preds)
    cv_f1_scores.append(curr_f1)
    acc_scores.append(curr_acc)
    prec_scores.append(curr_prec)
    recall_scores.append(curr_recall)
    print(f"F1-score for fold {fold_id} is {curr_f1:.3}. Accuracy is {curr_acc:.3}. Precision is {curr_prec:.3}. Recall is {curr_recall:.3}.")

print(f'Average cross-validation F1-score is {np.mean(cv_f1_scores):.3} +/- {np.std(cv_f1_scores):.3}.')
print(f'Average cross-validation ACC is {np.mean(acc_scores):.3} +/- {np.std(acc_scores):.3}.')
print(f'Average cross-validation Prec is {np.mean(prec_scores):.3} +/- {np.std(prec_scores):.3}.')
print(f'Average cross-validation Recall is {np.mean(recall_scores):.3} +/- {np.std(recall_scores):.3}.')

0it [00:00, ?it/s]



F1-score for fold 0 is 0.446. Accuracy is 0.929. Precision is 0.829. Recall is 0.305.




F1-score for fold 1 is 0.452. Accuracy is 0.929. Precision is 0.804. Recall is 0.314.




F1-score for fold 2 is 0.45. Accuracy is 0.929. Precision is 0.831. Recall is 0.308.




F1-score for fold 3 is 0.394. Accuracy is 0.924. Precision is 0.782. Recall is 0.264.




F1-score for fold 4 is 0.441. Accuracy is 0.929. Precision is 0.835. Recall is 0.299.
Average cross-validation F1-score is 0.436 +/- 0.0214.
Average cross-validation ACC is 0.928 +/- 0.00193.
Average cross-validation Prec is 0.816 +/- 0.0202.
Average cross-validation Recall is 0.298 +/- 0.0179.


In [13]:
curr_preds = model.predict(X=test_df[INPUT_COL_NAMES])
curr_f1 = f1_score(y_true=test_df[TARGET_COL_NAME], y_pred=curr_preds)
curr_acc = accuracy_score(y_true=test_df[TARGET_COL_NAME], y_pred=curr_preds)
curr_prec = precision_score(y_true=test_df[TARGET_COL_NAME], y_pred=curr_preds)
curr_recall = recall_score(y_true=test_df[TARGET_COL_NAME], y_pred=curr_preds)
print(f"F1-score for is {curr_f1:.3}. Accuracy is {curr_acc:.3}. Precision is {curr_prec:.3}. Recall is {curr_recall:.3}.")

F1-score for is 0.45. Accuracy is 0.929. Precision is 0.827. Recall is 0.309.
