In [1]:
import pandas as pd
import json
import numpy as np

np.random.seed(42)

# Preprocess

In [2]:
with open('dpr-bm25_query_results.json', 'r') as f:
    bm25_json = json.load(f)

with open('dpr-lmd_query_results.json', 'r') as f:
    lmd_json = json.load(f)

with open('dpr-classic_query_results.json', 'r') as f:
    classic_json = json.load(f)

with open('question-syifa-test.json', 'r') as f:
    gold_json = json.load(f)

In [3]:
posrel_map = dict()
negrel_map = dict()

for row in gold_json:
    qid = row['question']['id']
    posrel_map[qid] = []
    negrel_map[qid] = []
    for pos_ctx in row['ctxs']:
        posrel_map[qid].append(pos_ctx['id'])
    for neg_ctx in row['neg_ctxs']:
        negrel_map[qid].append(neg_ctx['id'])

In [4]:
bm25_df = pd.DataFrame()
for row in bm25_json:
    qid = row['question']['id']
    for ctx in row['ctxs']:
        if ctx['id'] in posrel_map[qid]:
            bm25_df = bm25_df.append({
                'sparse_score': ctx['bm25_score'],
                'dense_score': ctx['dpr_score'],
                'rel': 1
            }, ignore_index=True)
        elif ctx['id'] in negrel_map[qid]:
            bm25_df = bm25_df.append({
                'sparse_score': ctx['bm25_score'],
                'dense_score': ctx['dpr_score'],
                'rel': 0
            }, ignore_index=True)


In [5]:
lmd_df = pd.DataFrame()
for row in lmd_json:
    qid = row['question']['id']
    for ctx in row['ctxs']:
        if ctx['id'] in posrel_map[qid]:
            lmd_df = lmd_df.append({
                'sparse_score': ctx['lmd_score'],
                'dense_score': ctx['dpr_score'],
                'rel': 1
            }, ignore_index=True)
        elif ctx['id'] in negrel_map[qid]:
            lmd_df = lmd_df.append({
                'sparse_score': ctx['lmd_score'],
                'dense_score': ctx['dpr_score'],
                'rel': 0
            }, ignore_index=True)


In [6]:
classic_df = pd.DataFrame()
for row in classic_json:
    qid = row['question']['id']
    for ctx in row['ctxs']:
        if ctx['id'] in posrel_map[qid]:
            classic_df = classic_df.append({
                'sparse_score': ctx['classic_score'],
                'dense_score': ctx['dpr_score'],
                'rel': 1
            }, ignore_index=True)
        elif ctx['id'] in negrel_map[qid]:
            classic_df = classic_df.append({
                'sparse_score': ctx['classic_score'],
                'dense_score': ctx['dpr_score'],
                'rel': 0
            }, ignore_index=True)


# Data Analysis

In [7]:
print(len(bm25_df[bm25_df['rel']==1])/len(bm25_df[bm25_df['rel']==0]))

2.8421052631578947


In [8]:
print(len(lmd_df[lmd_df['rel']==1])/len(lmd_df[lmd_df['rel']==0]))

1.7278481012658229


In [9]:
print(len(classic_df[classic_df['rel']==1])/len(classic_df[classic_df['rel']==0]))

2.5242718446601944


# Search Best Params

In [10]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(shuffle=True, random_state=42)

In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score

clf = LogisticRegression(fit_intercept=False, random_state=42)

## BM25

In [12]:
X = bm25_df.drop('rel', axis=1)
y = bm25_df['rel'].astype(bool)
folds = 5
alpha_bm25 = 0.0
beta_bm25 = 0.0
f1 = 0.0
precision = 0.0
recall = 0.0
for train_index, test_index in skf.split(X, y):
    X_train, y_train = X.iloc[train_index], y.iloc[train_index]
    X_test, y_test = X.iloc[test_index], y.iloc[test_index]
    clf.fit(X_train.values, y_train.values)
    alpha_bm25 += clf.coef_[0][0]
    beta_bm25 += clf.coef_[0][1]
    y_pred = clf.predict(X_test.values)
    f1 += f1_score(y_test, y_pred)
    precision += precision_score(y_test, y_pred)
    recall += recall_score(y_test, y_pred)
alpha_bm25/=folds
beta_bm25/=folds
f1/=folds
precision/=folds
recall/=folds
print(f"F1 score: {f1}")
print(f"Precision score: {precision}")
print(f"Recall score: {recall}")
print(f"Best alpha:{alpha_bm25}")
print(f"Best beta: {beta_bm25}")

F1 score: 0.8503929769886345
Precision score: 0.7397335423197492
Recall score: 1.0
Best alpha:1.2025570627685844
Best beta: 0.9407036390334909


## LMD

In [13]:
X = lmd_df.drop('rel', axis=1)
y = lmd_df['rel'].astype(bool)
folds = 5
alpha_lmd = 0.0
beta_lmd = 0.0
f1 = 0.0
precision = 0.0
recall = 0.0
for train_index, test_index in skf.split(X, y):
    X_train, y_train = X.iloc[train_index], y.iloc[train_index]
    X_test, y_test = X.iloc[test_index], y.iloc[test_index]
    clf.fit(X_train.values, y_train.values)
    alpha_lmd += clf.coef_[0][0]
    beta_lmd += clf.coef_[0][1]
    y_pred = clf.predict(X_test.values)
    f1 += f1_score(y_test, y_pred)
    precision += precision_score(y_test, y_pred)
    recall += recall_score(y_test, y_pred)
alpha_lmd/=folds
beta_lmd/=folds
f1/=folds
precision/=folds
recall/=folds
print(f"F1 score: {f1}")
print(f"Precision score: {precision}")
print(f"Recall score: {recall}")
print(f"Best alpha:{alpha_lmd}")
print(f"Best beta: {beta_lmd}")

F1 score: 0.7755577436248698
Precision score: 0.633413525795242
Recall score: 1.0
Best alpha:0.77809029740458
Best beta: 0.5507165531926917


## Classic

In [14]:
classic_df = classic_df.dropna(axis=0)

In [15]:
X = classic_df.drop('rel', axis=1)
y = classic_df['rel'].astype(bool)
folds = 5
alpha_classic = 0.0
beta_classic = 0.0
f1 = 0.0
precision = 0.0
recall = 0.0
for train_index, test_index in skf.split(X, y):
    X_train, y_train = X.iloc[train_index], y.iloc[train_index]
    X_test, y_test = X.iloc[test_index], y.iloc[test_index]
    clf.fit(X_train.values, y_train.values)
    alpha_classic += clf.coef_[0][0]
    beta_classic += clf.coef_[0][1]
    y_pred = clf.predict(X_test.values)
    f1 += f1_score(y_test, y_pred)
    precision += precision_score(y_test, y_pred)
    recall += recall_score(y_test, y_pred)
alpha_classic/=folds
beta_classic/=folds
f1/=folds
precision/=folds
recall/=folds
print(f"F1 score: {f1}")
print(f"Best alpha:{alpha_classic}")
print(f"Best beta: {beta_classic}")
print(f"Precision score: {precision}")
print(f"Recall score: {recall}")

F1 score: 0.8381808566895822
Best alpha:0.9685231942879037
Best beta: 1.6053453674871936
Precision score: 0.7214397496087637
Recall score: 1.0
