In [1]:
################################
# General Imports
################################
import csv, math, io, os, os.path, sys, random, time, json, gc, glob, re
from datetime import datetime
import joblib
from joblib import Parallel, delayed, dump, load

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sb

################################
# Scientific Imports
################################
import scipy
from scipy.signal import butter,filtfilt

################################
# SKLearn Imports
################################
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

################################
# Suppress Warnings
################################
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

################################
# Initialisers
################################
default_rc_params = (16,9)
plt.rcParams["figure.figsize"] = default_rc_params
sb.set()

xNaNs = np.load("X_NAN_LIST.npy")
xTime = np.load("X_TIME_LIST.npy")

masterX = np.load("True_NOO_fluxes.npy")
masterY = np.load("True_NOO_isplanetlist.npy")

X_arr = np.load("NOO_TRANSFORMED_DATA.npy")

In [2]:
################################
# Functions
################################

def Every_Nth_Value_EACH(y,nth=40):
    return (y[::nth])

################################

def Every_Nth_Value(masterX,nth=40):
    
    #print("Step 4: Subsample (every nth val)")
    
    biglen = len(masterX)
    oldlen = len(masterX[0])
    newlen = len(masterX[0][::nth])
    #print(f"Old = {oldlen}; new = {newlen}")
    
    tmp = np.zeros((biglen,newlen))
    
    for n,X in enumerate(masterX):
        tmp[n] = Every_Nth_Value_EACH(X)
    
    return tmp

################################

def GetMetrics(X_arr, Y_arr, param_grid):

    algorithm = GaussianNB()
    
    # Make the transformers
    enth = FunctionTransformer(Every_Nth_Value)
    
    X_arr = enth.transform(X_arr)
    
    # Perform data manipulation
    X_train, X_test, y_train, y_test = train_test_split(X_arr, Y_arr, random_state=42)
    
    # Do gridsearch for svc params
    grid = GridSearchCV(algorithm, param_grid, return_train_score=True, n_jobs=3) # 4 programs running at once, 3 jobs = 12 CPUs, 3 for current, 1 spare - 16 total
    
    # Fit model
    grid.fit(X_train, y_train)
    
    # Get Model Data
    print("> MODEL")
    print("> > Best parameter (CV score=%0.3f):" % grid.best_score_)
    model = grid.best_estimator_
    print(model)
    

################################

def GetJSONFile(f):
    with open(f) as jf:
        jsonfile = json.load(jf)
        algoname = list(jsonfile.keys())[0]
        print(f"FILE = {f}; ALGORITHM = {algoname}")
    return(jsonfile, algoname)

################################

def OutputText(file):

    # Load and Parse the JSON file       
    jsondata, algoname = GetJSONFile(file)

    # Get the Keys
    keylist = []
    for keys in jsondata:
        keylist.append(jsondata[keys])

    columnList = list(keylist[0][0].keys())

    jsondatalist = []
    for i, x in enumerate(keylist):
        jsondatalist.append(list(list(x)[0].values()))

    #print(jsondatalist[0])

    # Convert TP,TF, etc into ints, not strings
    for row in jsondatalist:
        for i in range(3, len(row)-1):
            #print(row[i])
            try:
                row[i] = int(row[i])
            except ValueError:
                continue
                print("Row is a time not an int")
    
    return (jsondatalist)

################################

def GetRankList(df):

    rank = df['rank_test_score']
    rank = re.sub("\n", "", rank)
    rank = re.sub("  ", " ", rank)
    rank = re.sub(r"[\[\]]", r"", rank)
    rank = rank.split(" ")[1:]
    
    rank = [int(x) for x in rank]
    
    rank = np.array(rank)
    
    return (rank)
    
################################

def WriteJSON(data):
    
    data = {}
    data[targetname] = []
    data[targetname].append({
        'dateran': tStart.replace(microsecond=0),
        'tstart': tStart,
        'tfinish': tFin,
        'tdelta': tDelta,
        'TN' : TN,
        'FP' : FP,
        'FN' : FN,
        'TP' : TP,
        'Accuracy' : acc,
        'Precision' : pre,
        'Recall' : rec,
        'CV' : stats
    })

    # File saving stuff
    fname = targetname+".json"
    targetdest = "./NEW_RESULTS/"

    print("Saving {}".format(fname))

    # Write all the info to a file
    with open(targetdest+fname, "w") as f:
        #f.write(stats)
        json.dump(data, f, indent=4, default=str)
    
################################

In [3]:
path_to_json = "./NEW_RESULTS/"
json_files = [path_to_json + pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
json_files.sort()

In [4]:
json_files

['./NEW_RESULTS/sklearn-KNN.json',
 './NEW_RESULTS/sklearn-NB-gauss.json',
 './NEW_RESULTS/sklearn-RNDTREE.json',
 './NEW_RESULTS/sklearn-SVM.json',
 './NEW_RESULTS/sktime-CBOSS.json',
 './NEW_RESULTS/sktime-CIF.json',
 './NEW_RESULTS/sktime-DrCIF.json',
 './NEW_RESULTS/sktime-IndBOSS.json',
 './NEW_RESULTS/sktime-IndividualTDE.json',
 './NEW_RESULTS/sktime-KNTS-V2.json',
 './NEW_RESULTS/sktime-KNTS.json',
 './NEW_RESULTS/sktime-MUSE.json',
 './NEW_RESULTS/sktime-RISE.json',
 './NEW_RESULTS/sktime-STSF.json',
 './NEW_RESULTS/sktime-WEASEL.json']

In [5]:
def GetOptimalParameters():
    
    DATA = {}
    
    # Step 1: Load all JSON files
    path_to_json = "./NEW_RESULTS/"
    dest = "OPTIMAL/"
    json_files = [path_to_json + pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json') and pos_json.startswith('sk')]
    
    # Step 2: Loop for each JSON file
    for file in json_files:
        
        # Step 2a: Get file name and algorithm name
        jf, algo = GetJSONFile(file)
        #print(f"Algorithm: {algo}")
        DATA[algo] = []
        
        # Step 2b: Generates Stats Dictionary
        STATS = jf[algo][0]['CV']
        
        # Step 2c: Get the array of ranks
        rank = GetRankList(STATS)
        
        # Step 2d: Check if all parameters were equal
        if len(set(rank)) == 1:
            DATA[algo].append("######## ALL SAME ########")
        
        # Step 2e: If not all same, find best params
        else:
            BEST_PARAMS = np.where(rank==1)[0]
            for i in BEST_PARAMS:
                DATA[algo].append(STATS['params'][i])
                
    # Step 3: Write JSON File
    with open(path_to_json+dest+"optimal_params.json", "w") as f:
        #f.write(stats)
        json.dump(DATA, f, indent=4, default=str)
        
    return (DATA)

In [6]:
GetOptimalParameters()

FILE = ./NEW_RESULTS/sktime-CBOSS.json; ALGORITHM = sktime-CBOSS
FILE = ./NEW_RESULTS/sktime-RISE.json; ALGORITHM = sktime-RISE
FILE = ./NEW_RESULTS/sklearn-KNN.json; ALGORITHM = sklearn-KNN
FILE = ./NEW_RESULTS/sktime-IndividualTDE.json; ALGORITHM = sktime-IndividualTDE
FILE = ./NEW_RESULTS/sklearn-NB-gauss.json; ALGORITHM = sklearn-NB-gauss
FILE = ./NEW_RESULTS/sktime-IndBOSS.json; ALGORITHM = sktime-IndBOSS
FILE = ./NEW_RESULTS/sktime-WEASEL.json; ALGORITHM = sktime-WEASEL
FILE = ./NEW_RESULTS/sktime-CIF.json; ALGORITHM = sktime-CIF
FILE = ./NEW_RESULTS/sktime-KNTS-V2.json; ALGORITHM = sktime-KNTS-V2
FILE = ./NEW_RESULTS/sklearn-RNDTREE.json; ALGORITHM = sklearn-RNDTREE
FILE = ./NEW_RESULTS/sktime-STSF.json; ALGORITHM = sktime-STSF
FILE = ./NEW_RESULTS/sktime-DrCIF.json; ALGORITHM = sktime-DrCIF
FILE = ./NEW_RESULTS/sktime-KNTS.json; ALGORITHM = sktime-KNTS
FILE = ./NEW_RESULTS/sklearn-SVM.json; ALGORITHM = sklearn-SVM
FILE = ./NEW_RESULTS/sktime-MUSE.json; ALGORITHM = sktime-MUSE


{'sktime-CBOSS': [{'max_ensemble_size': 50, 'n_parameter_samples': 450}],
 'sktime-RISE': [{'min_interval': 64, 'n_estimators': 250}],
 'sklearn-KNN': [{'knn__algorithm': 'ball_tree',
   'knn__n_neighbors': 10,
   'knn__weights': 'uniform'},
  {'knn__algorithm': 'kd_tree',
   'knn__n_neighbors': 10,
   'knn__weights': 'uniform'},
  {'knn__algorithm': 'brute',
   'knn__n_neighbors': 10,
   'knn__weights': 'uniform'}],
 'sktime-IndividualTDE': [{'alphabet_size': 5, 'window_size': 100}],
 'sklearn-NB-gauss': ['######## ALL SAME ########'],
 'sktime-IndBOSS': [{'alphabet_size': 5, 'window_size': 10}],
 'sktime-WEASEL': [{'anova': True, 'window_inc': 2}],
 'sktime-CIF': [{'base_estimator': 'DTC', 'n_estimators': 150}],
 'sktime-KNTS-V2': [{'n_neighbors': 25, 'weights': 'uniform'},
  {'n_neighbors': 25, 'weights': 'distance'}],
 'sklearn-RNDTREE': [{'rnd__n_estimators': 100}, {'rnd__n_estimators': 1000}],
 'sktime-STSF': [{'n_estimators': 250}],
 'sktime-DrCIF': [{'base_estimator': 'DTC', 'n

# FUCKING YES I DID IT