In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")
X = {}
y = {}
X["train"] = np.load("X_train.npy")
y["train"] = np.load("y_train.npy")
X["dev"] = np.load("X_dev.npy")
y["dev"] = np.load("y_dev.npy")

In [2]:
X["train"].shape, X["dev"].shape

((11678, 300), (1298, 300))

In [3]:
(np.sum(y["train"]) + np.sum(y["dev"])) / (y["train"].shape[0] + y["dev"].shape[0])

0.10057028360049322

In [4]:
np.sum(y["train"]), np.sum(y["dev"])

(1181, 124)

In [5]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(penalty='l2', C=1)
model.fit(X["train"], y["train"])
pred = np.reshape(model.predict_proba(X["dev"])[:, 1], (model.predict_proba(X["dev"])[:, 1].shape[0], 1))
model.score(X["dev"], y["dev"])

0.9653312788906009

In [6]:
import sklearn.metrics as mc
mc.f1_score(y["dev"], np.round(pred))

0.8017621145374451

In [7]:
fpr, tpr, thresholds = mc.roc_curve(y["dev"], pred)
mc.auc(fpr, tpr)

0.9905341539814255

In [8]:
w = np.concatenate((model.coef_, model.intercept_[None, :]), axis=1)
F_train = np.concatenate([X["train"], np.ones((X["train"].shape[0], 1))], axis=1) # Concatenating one to calculate the gradient with respect to intercept
F_dev = np.concatenate([X["dev"], np.ones((X["dev"].shape[0], 1))], axis=1)

error_train = model.predict_proba(X["train"])[:, 1] - y["train"]
error_dev = model.predict_proba(X["dev"])[:, 1] - y["dev"]

gradient_train = F_train * error_train[:, None]
gradient_dev = F_dev * error_dev[:, None]
g = np.sum(gradient_train, axis=0) / 6920
gradient_train.shape, gradient_dev.shape

((11678, 301), (1298, 301))

In [9]:
#from scipy import sparse
probs = model.predict_proba(X["train"])[:, 1]
hessian = F_train.T @ np.diag(probs * (1 - probs)) @ F_train / X["train"].shape[0] + 1 * np.eye(F_train.shape[1]) / X["train"].shape[0]
inverse_hessian = np.linalg.inv(hessian)

In [10]:
eps = 1 / X["train"].shape[0]
delta_k = -eps * inverse_hessian @ gradient_train.T
grad_f = F_dev * (pred * (1 - pred))
delta_pred = grad_f @ delta_k
delta_pred.shape

(1298, 11678)

In [11]:
def Remove(k, scores, test_idx):
    #print("test_idx", test_idx)
    #print("old")
    #print(pred[test_idx])
    #print("k", k)
    
    if pred[test_idx] > 0.5:
        top_k_index = scores[test_idx].argsort()[-k:]
    else:
        top_k_index = scores[test_idx].argsort()[:k]

    X_k = np.delete(X["train"], top_k_index, axis=0)
    y_k = np.delete(y["train"], top_k_index, axis=0)

        
    prediction = -np.sum(scores[test_idx][top_k_index])
    
    
    #print("prediction", prediction)

    return X_k, y_k, prediction

In [12]:
def new_train(k, dev_index, scores):
    X_k, y_k, prediction = Remove(k, scores, dev_index)
    if y_k.shape[0] == np.sum(y_k) or np.sum(y_k) == 0: # data contains only one class: 1
        return None, None, None, None

    # Fit the model again
    model_k = LogisticRegression(penalty='l2', C=1)
    model_k.fit(X_k, y_k)

    # predictthe probaility with test point
    test_point = X["dev"][dev_index]
    test_point=np.reshape(test_point, (1,-1))
    new = model_k.predict_proba(test_point)[0][1]

    change = -(model.predict_proba(test_point)[0][1] - model_k.predict_proba(test_point)[0][1])
    #change = model_k.predict_proba(test_point)[0][1]-model.predict_proba(test_point)[0][1]
    flip = (model.predict(test_point) == model_k.predict(test_point))
    
    
    #print("change    ", change)
    #print("old       ", model.predict_proba(test_point)[0][1])
    #print()
    
    
    
  
    return change, flip, prediction, new

# Find approximate k by IF

In [15]:
def pred_change(k, scores, test_idx):
    X_k, y_k, prediction = Remove(k, delta_pred, test_idx)
    

In [16]:
def approximate_k(test_idx):
    old = pred[test_idx].item()
    for k in range(1, X["train"].shape[0]):
        _, _, change = Remove(k, delta_pred, test_idx)
        
        #new_train(k, test_idx, delta_pred)
        #print("cahnge", change)
        if change == None:
            return None
        
        if old > 0.5 and old + change < 0.5:
            return k
        elif old < 0.5 and old + change > 0.5:
            return k
        
    return None

In [17]:
# Loop over all dev points:

appro_ks = np.zeros((1298, 1))
new_pred = np.zeros((1298, 1))

for test_idx in range(1298):
    appro_k = approximate_k(test_idx)
    appro_ks[test_idx] = appro_k
    if appro_k == None:
        new = None
    else:
        _, _ , _, new = new_train(appro_k, test_idx, delta_pred)
    new_pred[test_idx] = new
    print(test_idx, appro_k, new)
    print()


np.save("appro_ks.npy", appro_ks)
np.save("new_predictions.npy", new_pred)
np.save("old_predictions.npy", pred)

0 None None

1 None None

2 None None

3 None None



KeyboardInterrupt: 

# exact_k

In [None]:
def exact_k(test_idx):
    for k in range(1, 11678):
        _, flip,_,_  = new_train(k, test_idx, delta_pred)
        if flip == None:
            return None
        if flip == False:
            return k
    return None

In [None]:
"""
# Loop over all dev points:
import warnings
warnings.filterwarnings("ignore")

exact_ks = []
exact_ks = np.zeros((1298, 1))
for test_idx in range(X["dev"].shape[0]):
    k =exact_k(test_idx)
    print(test_idx, k)
    exact_ks[test_idx]= k

exact_ks=np.array(exact_ks)
np.save("exact_ks_IP.npy", exact_ks)

"""

# Difference of change and prediction when k increasing

In [None]:
"""
row = len(list(range(0, X["dev"].shape[0])))
col = len(list(range(1, np.sum(y["train"]))))
changes = np.zeros((row, col))
prediction = np.zeros((row, col))

import warnings
warnings.filterwarnings("ignore")
for i in range(0, X["dev"].shape[0]):
    print(i)
    for j in range(1, np.sum(y["train"]):
        changes[i, j-1], _, prediction[i, j-1] = new_train(j, i, delta_pred)
        
   
np.save("pred_change_app.npy", prediction)
np.save("changes.npy", changes)

difference = prediction - changes
np.save("difference_k_appk.npy", difference)
"""