In [2]:
!pip install scikit-learn



In [113]:
import numpy as np
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [130]:
models = [0, 1, 3, 5, 6]
results = {}
THRESHOLD = 0.5

eval_labels = np.load('drive/MyDrive/eval_labels.npy')
eval_outputs = np.load('drive/MyDrive/eval_outputs.npy')[:, models]
test_outputs = np.load('drive/MyDrive/test_outputs.npy')[:, models]

print(eval_labels.shape, eval_outputs.shape, test_outputs.shape)

(125000,) (125000, 5) (10000, 5)


In [131]:
trainX, testX, trainY, testY = train_test_split(eval_outputs, eval_labels, test_size=0.5)
print(trainX.shape, testX.shape)

(62500, 5) (62500, 5)


In [132]:
predictions = np.where(np.mean(eval_outputs, axis=1) >= THRESHOLD, 1, 0)
print(f"averaging accuracy: {accuracy_score(predictions, eval_labels):.3%}")

results["avg"] = np.mean(test_outputs, axis=1)
results["avg"] = np.where(results["avg"] >= THRESHOLD, 1, -1)

averaging accuracy: 92.126%


In [133]:
names = ["linear", "logistic", "ridge"]
models = [LinearRegression(), LogisticRegression(), Ridge()]

for name, model in zip(names, models):
  model.fit(trainX, trainY)
  print(f"\n[{name}]\nModel Importance Weights:\n{model.coef_}")

  predictions = model.predict(testX)
  predictions = np.where(predictions >= THRESHOLD, 1, 0)

  print(f"Accuracy: {accuracy_score(predictions, testY):.3%}")

  results[name] = model.predict(test_outputs)
  results[name] = np.where(results[name] >= THRESHOLD, 1, -1)
  diff = np.sum(results["avg"] != results[name])
  print(f"Predictions Changed: {diff}")


[linear]
Model Importance Weights:
[0.14142618 0.20179898 0.3031399  0.11619852 0.18587813]
Accuracy: 92.274%
Predictions Changed: 57

[logistic]
Model Importance Weights:
[[0.91610326 1.22722565 1.85691913 0.98165256 1.36900319]]
Accuracy: 92.294%
Predictions Changed: 37

[ridge]
Model Importance Weights:
[0.14185993 0.2014356  0.30289885 0.1158291  0.18640898]
Accuracy: 92.274%
Predictions Changed: 57


In [134]:
def save_outputs(outputs: np.ndarray, file_name: str):
    """
    Save the model outputs to a file.
    :param outputs: Model outputs, np.ndarry with shape (10'000,) and values in {-1, 1}
    """
    indices = np.arange(1, outputs.shape[0] + 1)
    combined = np.column_stack((indices, outputs))

    np.savetxt(
        f'{file_name}.csv', combined, delimiter=',', fmt='%d', header="Id,Prediction", comments='')

In [135]:
for name in names:
  save_outputs(results[name], name)