In [None]:
#!/usr/bin/env python3

################################
# Scientific imports
################################
import matplotlib.pyplot as plt
import numpy as np

from astroquery.mast import Observations
from astroquery.mast import Catalogs

from astropy.table import Table
from astropy.table import QTable
from astropy.io import fits
from astropy import units as u
from astropy.timeseries import BoxLeastSquares
from astropy.timeseries import TimeSeries
from astropy.stats import sigma_clipped_stats
from astropy.timeseries import aggregate_downsample

import sktime as skt

from sktime.datatypes._panel._convert import (
    from_2d_array_to_nested,
    from_nested_to_2d_array,
    is_nested_dataframe,
)

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

################################
# General imports
################################
import csv, math, io, os, os.path, sys, random, time, json, gc
from datetime import datetime
import pandas as pd
import seaborn as sb
from collections import Counter
import joblib
from joblib import Parallel, delayed, dump, load

################################
# Suppress Warnings
################################
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)

################################
# Initialisers
################################
plt.rcParams["figure.figsize"] = (16,5)
sb.set()

# Load the Data files
#fitsarr = np.load("fitslist.npy")

In [1]:
%reset -f

import numpy as np
from joblib import Parallel, delayed, dump, load
from sklearn.model_selection import train_test_split
from sktime.datatypes._panel._convert import (
    from_2d_array_to_nested,
    from_nested_to_2d_array,
    is_nested_dataframe,
)

rows=30

In [None]:
#X = np.load("None_Or_One_Exoplanet.npy")
#X = np.array([item[1:-1] for item in np.load("None_Or_One_Exoplanet_NORMALISED.npy")])

#y = np.load("one_or_none_isplanetlist.npy")[:rows]
#X_nested = from_2d_array_to_nested(np.array([item[1:-1] for item in np.load("None_Or_One_Exoplanet_NORMALISED.npy")])[:rows, :])
#print(f"X_nested is a nested DataFrame: {is_nested_dataframe(X_nested)}")
#print(f"The cell contains a {type(X_nested.iloc[0,0])}.")
#print(f"The nested DataFrame has shape {X_nested.shape}")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_nested, y)
#X_train, X_test, y_train, y_test = np.load("X_y_test_train_split.npy",allow_pickle=True)

In [None]:
#
#
# USED TO SAVE A COPY OF TEST AND TRAIN TO MAKE SURE FUTURE MODEL FITTINGS ARE OF THE SAME DATA
#
#
np.save("X_train", X_train)
np.save("y_train", y_train)

In [None]:
#
#
# USED TO TEST CORRECTLY LOADING THE X_Y_TEST_TRAIN_SPLIT
#
#
import numpy as np
#X_train, X_test, y_train, y_test = np.load("X_y_test_train_split.npy",allow_pickle=True)
X_train = np.load("X_train.npy",allow_pickle=True)
y_train = np.load("y_train.npy",allow_pickle=True)

#np.save("X_train", X_train)
#np.save("y_train", y_train)

In [None]:
#classifier = TimeSeriesForestClassifier()
#classifier.fit(X_train, y_train)
#y_pred = classifier.predict(X_test)

#print(f"Accuracy: {accuracy_score(y_test, y_pred)}\nPrecision: {precision_score(y_test, y_pred)}\nRecall: {recall_score(y_test, y_pred)}")

In [2]:
from sktime.classification.dictionary_based import IndividualTDE

list_of_classifiers = [
    IndividualTDE
]

#MUSE().fit(X_train, y_train)

In [None]:
def myfunc(classifierType, rows=-1):
    X = np.array([item[1:-1] for item in np.load("None_Or_One_Exoplanet_NORMALISED.npy")])
    y = np.load("one_or_none_isplanetlist.npy")
    
    if rows > 0:
        X = X[:rows, :]
        y = y[:rows]
    
    print("Splitting into Test and Train sets...")
    X_nested = from_2d_array_to_nested(X)
    X_train, X_test, y_train, y_test = train_test_split(X_nested, y)
    
    # Selecting Classifier Type
    c = classifierType()
    cname = classifierType.__name__
    
    # Fitting Classifier
    print(f"Fitting the {cname} model...")
    c.fit(X_train, y_train)
    
    # Saving Model
    print(f"Saving {cname} model...")
    if rows > 0:
        dump(c, f'./sktime_models/{cname}_fitted_{rows}.joblib')
    else:
        dump(c, f'./sktime_models/{cname}_fitted_FULL.joblib')
        
    print("Done!")

In [3]:
def MakeModelsNEW(classifierType, rows=None):
    
    print("Loading the Test and Train Datasets...")
    
    y = np.load("one_or_none_isplanetlist.npy")[:rows]
    X_nested = from_2d_array_to_nested(np.array([item[1:-1] for item in np.load("None_Or_One_Exoplanet_NORMALISED.npy")])[:rows, :])
    X_train, X_test, y_train, y_test = train_test_split(X_nested, y)
    
    # Selecting Classifier Type
    c = classifierType()
    cname = classifierType.__name__
    
    # Fitting Classifier
    print(f"Fitting the {cname} model...")
    c.fit(X_train, y_train)
    
    # Saving Model
    print(f"Saving {cname} model...")
    if rows > 0:
        dump(c, f'./sktime_models/_{cname}_fitted_{rows}.joblib')
    else:
        dump(c, f'./sktime_models/_{cname}_fitted_FULL.joblib')
        

In [None]:
# HOPEFULLY THIS WORKS!!! SEE: https://stackoverflow.com/questions/15639779/why-does-multiprocessing-use-only-a-single-core-after-i-import-numpy

os.system("taskset -p 0xff %d" % os.getpid())

In [None]:
%%time
myfunc(list_of_classifiers[1], 10)

In [None]:
%%time
myfunc(list_of_classifiers[1], 100)

In [None]:
%%time
Parallel(n_jobs=8)(delayed(myfunc)(c, 10) for c in small_list_of_classifiers)

In [None]:
%%time
Parallel(n_jobs=8)(delayed(myfunc)(c, 100) for c in small_list_of_classifiers)

In [4]:
%%time
MakeModelsNEW(list_of_classifiers[0], rows)

Loading the Test and Train Datasets...
Fitting the MUSE model...
Saving MUSE model...
CPU times: user 5min 46s, sys: 771 ms, total: 5min 47s
Wall time: 5min 47s


In [None]:
%%time
myfunc(list_of_classifiers[0], 20)

In [None]:
%%time
myfunc(list_of_classifiers[0], 40)

In [None]:
# Full one should take about 18 hours to run

In [None]:
%%time
Parallel(n_jobs=3)(delayed(myfunc)(c, 40) for c in list_of_classifiers)

In [None]:
%%time
Parallel(n_jobs=3, prefer="threads")(delayed(myfunc)(c, 40) for c in list_of_classifiers)

In [None]:
%%time
Parallel(n_jobs=-1, prefer="threads")(delayed(myfunc)(c, 40) for c in list_of_classifiers)