In [1]:
# from MuyGPyS import config
# config.update("muygpys_jax_enabled", False)

import numpy as np
import pandas as pd
import random
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from MuyGPyS.examples.classify import do_classify
from MuyGPyS.gp.deformation import F2, Isotropy, l2
from MuyGPyS.gp.hyperparameter import Parameter, Parameter as ScalarParam
from MuyGPyS.gp.kernels import RBF, Matern
from MuyGPyS.gp.noise import HomoscedasticNoise
from MuyGPyS.optimize import Bayes_optimize, L_BFGS_B_optimize
from MuyGPyS.optimize.loss import LossFn, cross_entropy_fn, looph_fn



No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)


In [None]:

# data_path = ['norm_11.csv','norm_1.csv', 'norm_21.csv', 'raw_image_data.csv']
data_path = [ 'raw_image_data.csv']
# get rid of "../data/data-norm/"
norm_data_names = data_path
norm_data_names

In [2]:
def generate_onehot_value(values):
    onehot = []
    for val in values:
        if val == 0:
            onehot.append([1., -1.])
        elif val == 1:
            onehot.append([-1., 1.])
    return onehot

In [None]:
nn_kwargs_exact = {"nn_method": "exact", "algorithm": "ball_tree"}

nn_kwargs_hnsw = {"nn_method": "hnsw"}

k_kwargs_rbf ={
            "kernel": RBF(
                 deformation=Isotropy(
                     metric=F2,
                 length_scale=Parameter(1.0, (1e-2, 1e2)),
                 ),
            ),
            "noise": HomoscedasticNoise(1e-5),
            }
k_kwargs_mattern= { "kernel": Matern(
             smoothness=ScalarParam(0.5),
             deformation=Isotropy(
                 metric=F2,
                 length_scale=Parameter(1.0, (1e-2, 1e2)),
             ),
         ),
         "noise": HomoscedasticNoise(1e-5),
         }

### Vary Test Sizes
Using the top performing normalization, we vary test sizes to see the effect

In [3]:

nn_kwargs_hnsw = {"nn_method": "hnsw"}

k_kwargs_mattern= { "kernel": Matern(
             smoothness=ScalarParam(0.858571),
             deformation=Isotropy(
                 metric=F2,
                 length_scale=Parameter(.756327, (1e-2, 1e2)),
             ),
         ),
         "noise": HomoscedasticNoise(0.244898),
         }

In [4]:
import pickle

test_sizes = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6,0.7,0.8]
# read in data
path = 'norm_21.csv'
path1 = '../data/data-norm/max-only/' + path
data = pd.read_csv(path1,na_values='-')
data.fillna(0,inplace=True)
data_label = ''.join(path.split('.')[:2])
truth_labels = data.iloc[:, 0].values
image_data = data.iloc[:, 1:].values

accuracy = {i:[] for i in test_sizes}
i = 0   # run one at a time since MUYGPS won't left me run multiple at once
for size in test_sizes[i]:
    X_train, X_test, y_train, y_test = train_test_split(image_data, truth_labels, test_size=size, random_state=42)

    print("=============== ", data_label, " ===============")
    print('Training data:', len(y_train[y_train==0]), 'single stars and', len(y_train[y_train==1]), 'blended stars')
    print('Testing data:', len(y_test[y_test==0]), 'single stars and', len(y_test[y_test==1]), 'blended stars')

    onehot_train, onehot_test = generate_onehot_value(y_train), generate_onehot_value(y_test)

    train = {'input': X_train, 'output': onehot_train, 'lookup': y_train}
    test = {'input': X_test, 'output': onehot_test, 'lookup': y_test}

    print("Running Classifier on", data_label)
    #Switch verbose to True for more output


    muygps, nbrs_lookup, surrogate_predictions = do_classify(
                                test_features=np.array(test['input']), 
                                train_features=np.array(train['input']), 
                                train_labels=np.array(train['output']), 
                                nn_count=35,
                                batch_count=20,
                                loss_fn=looph_fn,
                                opt_fn=L_BFGS_B_optimize,
                                k_kwargs=k_kwargs_mattern,
                                nn_kwargs=nn_kwargs_hnsw,
                                verbose=False)
    predicted_labels = np.argmax(surrogate_predictions, axis=1)
    accur = np.around((np.sum(predicted_labels == np.argmax(test["output"], axis=1))/len(predicted_labels))*100, 3)
    accuracy[size].append(accur)
    print("Total accuracy for", size, ":", accur)

    # check if accuracy file exists, if not save it as a new file and if it does exist, append to it
    try:
        with open('./vary-test-size/muygps-accuracy.pkl', 'rb') as f:
            acc = pickle.load(f)
            acc.update(accuracy)
        with open('./vary-test-size/muygps-accuracy.pkl', 'wb') as f:
            pickle.dump(acc, f)
    except:
        with open('./vary-test-size/muygps-accuracy.pkl', 'wb') as f:
            pickle.dump(accuracy, f)


Training data: 3021 single stars and 2429 blended stars
Testing data: 12088 single stars and 9714 blended stars
Running Classifier on norm_21csv


  np.sqrt(
  + np.log(variances)


: 