In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")
X = {}
y = {}
X["train"] = np.load("X_train.npy")
y["train"] = np.load("y_train.npy")
X["dev"] = np.load("X_dev.npy")
y["dev"] = np.load("y_dev.npy")

In [2]:
X["train"].shape, X["dev"].shape

((9632, 300), (1071, 300))

In [3]:
(np.sum(y["train"]) + np.sum(y["dev"]))/(X["train"].shape[0] + X["dev"].shape[0])

0.1117443707371765

In [4]:
from sklearn.linear_model import LogisticRegression
l2 = 10
model = LogisticRegression(penalty='l2', C=1/l2)
model.fit(X["train"], y["train"])
pred = np.reshape(model.predict_proba(X["dev"])[:, 1], (model.predict_proba(X["dev"])[:, 1].shape[0], 1))
model.score(X["dev"], y["dev"])

0.8860877684407096

In [5]:
np.save("old_predictions.npy", pred)

In [6]:
import sklearn.metrics as mc
mc.f1_score(y["dev"], np.round(pred))
# change threshold
threshold = 0.25
decisions = (pred >= threshold).astype(int)
print(mc.f1_score(np.reshape(y["dev"], (1071, 1)), decisions))
print(mc.classification_report(np.reshape(y["dev"], (1071, 1)), decisions))

0.3964757709251101
              precision    recall  f1-score   support

           0       0.92      0.93      0.93       953
           1       0.41      0.38      0.40       118

    accuracy                           0.87      1071
   macro avg       0.67      0.66      0.66      1071
weighted avg       0.87      0.87      0.87      1071



In [16]:
fpr, tpr, thresholds = mc.roc_curve(y["dev"], pred)
mc.auc(fpr, tpr)

0.8137994202073737

In [17]:
import pandas as pd
import sklearn.metrics as metrics

In [18]:
w = np.concatenate((model.coef_, model.intercept_[None, :]), axis=1)
F_train = np.concatenate([X["train"], np.ones((X["train"].shape[0], 1))], axis=1) # Concatenating one to calculate the gradient with respect to intercept
F_dev = np.concatenate([X["dev"], np.ones((X["dev"].shape[0], 1))], axis=1)

error_train = model.predict_proba(X["train"])[:, 1] - y["train"]
error_dev = model.predict_proba(X["dev"])[:, 1] - y["dev"]

gradient_train = F_train * error_train[:, None]
gradient_dev = F_dev * error_dev[:, None]
g = np.sum(gradient_train, axis=0) / 6920
gradient_train.shape, gradient_dev.shape

((9632, 301), (1071, 301))

In [19]:
## from scipy import sparse
probs = model.predict_proba(X["train"])[:, 1]
hessian = F_train.T @ np.diag(probs * (1 - probs)) @ F_train / X["train"].shape[0] +  10* np.eye(F_train.shape[1]) / X["train"].shape[0]
inverse_hessian = np.linalg.inv(hessian)

In [20]:
eps = 1 / X["train"].shape[0]
delta_k = -eps * inverse_hessian @ gradient_train.T
grad_f = F_dev * (pred * (1 - pred))
delta_pred = grad_f @ delta_k
delta_pred.shape

(1071, 9632)

In [21]:
def Remove(k, scores, test_idx):
    #print("test_idx", test_idx)
    #print("old")
    #print(pred[test_idx])
    #print("k", k)
    
    if pred[test_idx] > 0.25:
        top_k_index = scores[test_idx].argsort()[-k:]
    else:
        top_k_index = scores[test_idx].argsort()[:k]

    X_k = np.delete(X["train"], top_k_index, axis=0)
    y_k = np.delete(y["train"], top_k_index, axis=0)

        
    prediction = -np.sum(scores[test_idx][top_k_index])
    
    
    #print("prediction", prediction)

    return X_k, y_k, prediction

In [22]:
def new_train(k, dev_index, scores):
    X_k, y_k, prediction = Remove(k, scores, dev_index)
    if y_k.shape[0] == np.sum(y_k) or np.sum(y_k) == 0: # data contains only one class: 1
        return None, None, None, None

    # Fit the model again
    model_k = LogisticRegression(penalty='l2', C=0.1)
    model_k.fit(X_k, y_k)

    # predictthe probaility with test point
    test_point = X["dev"][dev_index]
    test_point=np.reshape(test_point, (1,-1))
    new = model_k.predict_proba(test_point)[0][1]

    change = -(model.predict_proba(test_point)[0][1] - model_k.predict_proba(test_point)[0][1])
    #change = model_k.predict_proba(test_point)[0][1]-model.predict_proba(test_point)[0][1]
    flip = ((model.predict(test_point)>0.25) == (model_k.predict(test_point)>0.25))
    
    
    #print("change    ", change)
    #print("old       ", model.predict_proba(test_point)[0][1])
    #print()
    
    
    
  
    return change, flip, prediction, new

# Find approximate k by IF

In [23]:
def pred_change(k, scores, test_idx):
    X_k, y_k, prediction = Remove(k, delta_pred, test_idx)
    

In [24]:
def approximate_k(test_idx):
    old = pred[test_idx].item()
    for k in range(1, X["train"].shape[0]):
        _, _, change = Remove(k, delta_pred, test_idx)
        
        #new_train(k, test_idx, delta_pred)
        #print("cahnge", change)
        if change == None:
            return None
        
        if old > 0.25 and old + change < 0.25:
            print("prediction", change)
            return k
        elif old < 0.25 and old + change > 0.25:
            print("prediction", change)
            return k
        
    return None

In [25]:
# Loop over all dev points:

appro_ks = np.zeros((X["dev"].shape[0], 1))
new_pred = np.zeros((X["dev"].shape[0], 1))

for test_idx in range(X["dev"].shape[0]):
    appro_k = approximate_k(test_idx)
    appro_ks[test_idx] = appro_k
    if appro_k == None:
        new = None
    else:
        _, _ , _, new = new_train(appro_k, test_idx, delta_pred)
    new_pred[test_idx] = new
    print(test_idx, appro_k, pred[test_idx], new)
    print()


np.save("appro_ks.npy", appro_ks)
np.save("new_pred.npy", new_pred)

prediction 0.197407241333675
0 2191 [0.05260323] 0.9510751775888799

prediction -0.20302789052492853
1 24 [0.44794801] 0.2520871896231084

2 None [0.00101895] None

3 None [0.07037846] None

prediction -0.20555427937712567
4 72 [0.45548376] 0.24790828802253426

prediction 0.045537691639109354
5 19 [0.20574632] 0.26783074581244215

prediction -0.01462468213105016
6 3 [0.26273143] 0.2478696576025324

prediction 0.14281452727356933
7 518 [0.10732156] 0.48609391096033705

prediction 0.09205696916809278
8 237 [0.1580026] 0.317757677692976

9 None [0.04240677] None

prediction 0.15273232940036094
10 904 [0.09728552] 0.5709582667191523

prediction 0.1544020645701336
11 907 [0.09565065] 0.6750157915132654

prediction 0.07328645368791312
12 118 [0.17704108] 0.27509530163043494

prediction 0.10878990736715012
13 269 [0.14141457] 0.3341814978304452

prediction 0.16884999432859985
14 665 [0.08121635] 0.5562393136645656

prediction -0.15986787828811605
15 70 [0.4087051] 0.2448681252192257

predicti

KeyboardInterrupt: 