In [10]:
import numpy as np
from sklearn.metrics import f1_score, classification_report

def search(rf_prob, vgg_prob, true):
    max = 0
    best_i = 0

    for i in np.arange(0, 1 + 0.01, 0.01):
        ensemble_prob = i * vgg_prob + (1-i)*rf_prob
        ensempre_pred = np.argmax(ensemble_prob, axis=1)
        score = f1_score(true, ensempre_pred, average='macro')

        if score > max:
            max = score
            best_i = i

    print(f"best i: {best_i}")

    ensemble_prob = best_i * vgg_prob + (1-best_i)*rf_prob
    ensempre_pred = np.argmax(ensemble_prob, axis=1)

    print(classification_report(true, ensempre_pred, digits = 6, target_names = ['QSO', 'STAR', 'GALAXY']))
    


In [17]:
def eval(rf_prob, vgg_prob, true, best_i):
    ensemble_prob = best_i * vgg_prob + (1-best_i)*rf_prob
    ensempre_pred = np.argmax(ensemble_prob, axis=1)

    print(classification_report(true, ensempre_pred, digits = 6, target_names = ['QSO', 'STAR', 'GALAXY']))

In [4]:
################## VALIDATION #################################

# Performance pairs: 3,2 - 1,3 - 2,1

true = np.load('preds/true_nwval.npy')

urf1 = np.load('preds/URF_1_nwval.npy')
urf2 = np.load('preds/URF_2_nwval.npy')
urf3 = np.load('preds/URF_3_nwval.npy')

vgg1 = np.load('preds/VGG_1_nwval.npy')
vgg2 = np.load('preds/VGG_2_nwval.npy')
vgg3 = np.load('preds/VGG_3_nwval.npy')



In [11]:
search(urf3, vgg2, true)

best i: 0.09
              precision    recall  f1-score   support

         QSO   0.786408  0.880435  0.830769       276
        STAR   0.931389  0.901993  0.916456       602
      GALAXY   0.916350  0.870036  0.892593       277

    accuracy                       0.889177      1155
   macro avg   0.878049  0.884155  0.879939      1155
weighted avg   0.893138  0.889177  0.890257      1155



In [12]:
search(urf1, vgg3, true)

best i: 0.14
              precision    recall  f1-score   support

         QSO   0.773163  0.876812  0.821732       276
        STAR   0.929553  0.898671  0.913851       602
      GALAXY   0.919231  0.862816  0.890130       277

    accuracy                       0.884848      1155
   macro avg   0.873982  0.879433  0.875238      1155
weighted avg   0.889706  0.884848  0.886149      1155



In [13]:
search(urf2, vgg1, true)

best i: 0.4
              precision    recall  f1-score   support

         QSO   0.782178  0.858696  0.818653       276
        STAR   0.920068  0.898671  0.909244       602
      GALAXY   0.905303  0.862816  0.883549       277

    accuracy                       0.880519      1155
   macro avg   0.869183  0.873394  0.870482      1155
weighted avg   0.883577  0.880519  0.881434      1155



In [14]:
################## TEST #################################

# Performance pairs: 3,2 - 1,3 - 2,1

true = np.load('preds/true_nwtest.npy')

urf1 = np.load('preds/URF_1_nwtest.npy')
urf2 = np.load('preds/URF_2_nwtest.npy')
urf3 = np.load('preds/URF_3_nwtest.npy')

vgg1 = np.load('preds/VGG_1_nwtest.npy')
vgg2 = np.load('preds/VGG_2_nwtest.npy')
vgg3 = np.load('preds/VGG_3_nwtest.npy')

In [23]:
eval(urf3, vgg2, true, 0.09)

              precision    recall  f1-score   support

         QSO   0.757225  0.864686  0.807396       303
        STAR   0.917976  0.844302  0.879599       623
      GALAXY   0.859206  0.881481  0.870201       270

    accuracy                       0.857860      1196
   macro avg   0.844802  0.863490  0.852399      1196
weighted avg   0.863983  0.857860  0.859185      1196



In [24]:
eval(urf1, vgg3, true, 0.14)

              precision    recall  f1-score   support

         QSO   0.755682  0.877888  0.812214       303
        STAR   0.929825  0.850722  0.888516       623
      GALAXY   0.879562  0.892593  0.886029       270

    accuracy                       0.867057      1196
   macro avg   0.855023  0.873734  0.862253      1196
weighted avg   0.874360  0.867057  0.868624      1196



In [25]:
eval(urf2, vgg1, true, 0.4)

              precision    recall  f1-score   support

         QSO   0.776812  0.884488  0.827160       303
        STAR   0.928322  0.852327  0.888703       623
      GALAXY   0.867384  0.896296  0.881603       270

    accuracy                       0.870401      1196
   macro avg   0.857506  0.877704  0.865822      1196
weighted avg   0.876180  0.870401  0.871509      1196



In [26]:
############### VALIDATION METRICS ################

r1 = [0.889177, 0.879939]
r2 = [0.884848, 0.875238]
r3 = [0.880519, 0.870482]

nw = np.array([r1, r2, r3])*100

nw_mean = np.mean(nw, axis=0)
nw_std = np.std(nw, axis=0)

In [28]:
nw_std

array([0.35346137, 0.38608258])

TEST SET

In [29]:
################## WITHOUT WISE METRICS ####################

r1 = [0.834448, 0.832050]
r2 = [0.833612, 0.831975]
r3 = [0.832776, 0.830717]

nw = np.array([r1, r2, r3])*100

nw_mean = np.mean(nw, axis=0)
nw_std = np.std(nw, axis=0)

In [32]:
################## UNIFIED RF METRICS ####################

r1 = [0.858696 , 0.853476]
r2 = [0.859532, 0.853283]
r3 = [0.857860 , 0.851644]

nw = np.array([r1, r2, r3])*100

nw_mean = np.mean(nw, axis=0)
nw_std = np.std(nw, axis=0)

In [35]:
################## ENSEMBLE METRICS ####################

r1 = [0.857860 ,0.852399]
r2 = [0.867057 , 0.862253]
r3 = [0.870401 , 0.865822]

nw = np.array([r1, r2, r3])*100

nw_mean = np.mean(nw, axis=0)
nw_std = np.std(nw, axis=0)

In [37]:
nw_std

array([0.53024504, 0.56766188])