In [1]:
## Import Packages
from __future__ import print_function

import numpy as np
import pandas as pd
from itertools import product

#Astro Software
import astropy.units as units
from astropy.coordinates import SkyCoord
from astropy.io import fits

#Plotting Packages
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from matplotlib import rcParams

import seaborn as sns

from PIL import Image

from yt.config import ytcfg
import yt
import yt.units as u

#Scattering NN
import torch
import torch.nn.functional as F
from torch import optim
from kymatio.torch import Scattering2D
device = "cpu"

#Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.mixture import GaussianMixture
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.decomposition import PCA, FastICA

import skimage
from skimage import filters

from scipy.optimize import curve_fit
from scipy import linalg
from scipy import stats
from scipy.signal import general_gaussian

#I/O
import h5py
import pickle
import glob
import copy
import time

#Plotting Style
%matplotlib inline
plt.style.use('dark_background')
rcParams['text.usetex'] = False
rcParams['axes.titlesize'] = 20
rcParams['xtick.labelsize'] = 16
rcParams['ytick.labelsize'] = 16
rcParams['legend.fontsize'] = 12
rcParams['axes.labelsize'] = 20
rcParams['font.family'] = 'sans-serif'

#Threading
torch.set_num_threads=2
from multiprocessing import Pool

import ntpath
def path_leaf(path):
    head, tail = ntpath.split(path)
    out = os.path.splitext(tail)[0]
    return out

def hd5_open(file_name,name):
    f=h5py.File(file_name,'r', swmr=True)
    data = f[name][:]
    f.close()
    return data

from matplotlib.colors import LinearSegmentedColormap
cdict1 = {'red':   ((0.0, 0.0, 0.0),
                   (0.5, 0.0, 0.0),
                   (1.0, 1.0, 1.0)),

         'green': ((0.0, 0.0, 0.0),
                   (1.0, 0.0, 0.0)),

         'blue':  ((0.0, 0.0, 1.0),
                   (0.5, 0.0, 0.0),
                   (1.0, 0.0, 0.0))
        }
blue_red1 = LinearSegmentedColormap('BlueRed1', cdict1,N=5000)

from sklearn.preprocessing import StandardScaler

  self[key]


In [3]:
mnist_train_y = hd5_open('../scratch_AKS/data/mnist_train_y.h5','main/data')
mnist_test_y = hd5_open('../scratch_AKS/data/mnist_test_y.h5','main/data')

mnist_DHC_out = hd5_open('../scratch_AKS/data/mnist_DHC_out.h5','main/data')
mnist_DHC_out_test = hd5_open('../scratch_AKS/data/mnist_DHC_out_test.h5','main/data')

mnist_DHC_out_rot7pi = hd5_open('../scratch_AKS/data/mnist_DHC_out_rot7pi_8.h5','main/data')
mnist_DHC_out_rot7pi_test = hd5_open('../scratch_AKS/data/mnist_DHC_out_rot7pi_8_test.h5','main/data')

mnist_DHC_out_rotpi_3 = hd5_open('../scratch_AKS/data/mnist_DHC_out_rotpi_3.h5','main/data')
mnist_DHC_out_rotpi_3_test = hd5_open('../scratch_AKS/data/mnist_DHC_out_rotpi_3_test.h5','main/data')

mnist_DHC_out_rot2pi_3 = hd5_open('../scratch_AKS/data/mnist_DHC_out_rot2pi_3.h5','main/data')
mnist_DHC_out_rot2pi_3_test = hd5_open('../scratch_AKS/data/mnist_DHC_out_rot2pi_3_test.h5','main/data')

mnist_DHC_out_stack = np.vstack((mnist_DHC_out, mnist_DHC_out_rotpi_3,mnist_DHC_out_rot2pi_3))
mnist_DHC_out_stack_test = np.vstack((mnist_DHC_out_test, mnist_DHC_out_rotpi_3_test,mnist_DHC_out_rot2pi_3_test))

mnist_DHC_out_stack_train_y = np.hstack((mnist_train_y,mnist_train_y,mnist_train_y))
mnist_DHC_out_stack_test_y = np.hstack((mnist_test_y,mnist_test_y,mnist_test_y))

In [2]:
def prec_LDA(lda,X_test,y_test):
    y_pred = lda.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    prec = precision_score(y_test, y_pred,average='micro')
    return prec

def DHC_iso(wst,J,L):
    (nk, Nd) = np.shape(wst)
    S0 = wst[:,0:2]
    S1 = wst[:,2:J*L+2]
    S2 = np.reshape(wst[:,J*L+3:],(nk,(J*L+1),(J*L+1)))
    
    S1iso = np.zeros((nk,J))
    for j1 in range(J):
        for l1 in range(L):
            S1iso[:,j1] += S1[:,l1*J+j1]
    
    S2iso = np.zeros((nk,J,J,L))
    for j1 in range(J):
        for j2 in range(J):
            for l1 in range(L):
                for l2 in range(L):
                    deltaL = np.mod(l1-l2,L)
                    S2iso[:,j1,j2,deltaL] += S2[:,l1*J+j1,l2*J+j2]
                    
    Sphi1 = np.zeros((nk,J))
    for j1 in range(J):
        for l1 in range(L):
            Sphi1[:,j1] += S2[:,l1*J+j1,L*J]
            
    Sphi2 = np.zeros((nk,J))
    for j1 in range(J):
        for l1 in range(L):
            Sphi2[:,j1] += S2[:,L*J,l1*J+j1]
            
    return np.hstack((S0,S1iso,wst[:,J*L+2].reshape(nk,1),S2iso.reshape(nk,J*J*L),Sphi1,Sphi2,S2[:,L*J,L*J].reshape(nk,1)))

In [15]:
N = 10
ldaAug = LDA(n_components=N-1)
ldaAug.fit(DHC_iso(mnist_DHC_out_stack,6,8), mnist_DHC_out_stack_train_y)

LinearDiscriminantAnalysis(n_components=9)

In [16]:
N = 10
ldaNR = LDA(n_components=N-1)
ldaNR.fit(DHC_iso(mnist_DHC_out,6,8), mnist_train_y)

LinearDiscriminantAnalysis(n_components=9)

In [6]:
mnist_DHC_outR = hd5_open('../from_cannon/2021_03_08/mnist_DHC_train_RR_wd2.h5','main/data')
mnist_DHC_outR_test = hd5_open('../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2.h5','main/data')

In [17]:
N = 10
ldaR = LDA(n_components=N-1)
ldaR.fit(DHC_iso(mnist_DHC_outR,6,8), mnist_train_y)

LinearDiscriminantAnalysis(n_components=9)

In [18]:
prec_LDA(ldaNR,DHC_iso(mnist_DHC_out_test,6,8),mnist_test_y)

0.935

In [19]:
prec_LDA(ldaR,DHC_iso(mnist_DHC_outR_test,6,8),mnist_test_y)

0.9213

In [20]:
prec_LDA(ldaNR,DHC_iso(mnist_DHC_outR_test,6,8),mnist_test_y)

0.873

In [21]:
prec_LDA(ldaAug,DHC_iso(mnist_DHC_out_test,6,8),mnist_test_y)

0.9195

In [22]:
prec_LDA(ldaAug,DHC_iso(mnist_DHC_outR_test,6,8),mnist_test_y)

0.9207

Well, now we must do REG

In [23]:
N = 10
ldaAugREG = LDA(n_components=N-1)
ldaAugREG.fit(mnist_DHC_out_stack, mnist_DHC_out_stack_train_y)

LinearDiscriminantAnalysis(n_components=9)

In [24]:
N = 10
ldaNRREG = LDA(n_components=N-1)
ldaNRREG.fit(mnist_DHC_out, mnist_train_y)

LinearDiscriminantAnalysis(n_components=9)

In [25]:
N = 10
ldaRREG = LDA(n_components=N-1)
ldaRREG.fit(mnist_DHC_outR, mnist_train_y)

LinearDiscriminantAnalysis(n_components=9)

In [26]:
prec_LDA(ldaNRREG,mnist_DHC_out_test,mnist_test_y)

0.9623

In [27]:
prec_LDA(ldaRREG,mnist_DHC_outR_test,mnist_test_y)

0.922

In [28]:
prec_LDA(ldaNRREG,mnist_DHC_outR_test,mnist_test_y)

0.4972

In [29]:
prec_LDA(ldaAugREG,mnist_DHC_out_test,mnist_test_y)

0.9415

In [30]:
prec_LDA(ldaAugREG,mnist_DHC_outR_test,mnist_test_y)

0.8851

Now we do loops to obtain error bars

In [38]:
test_list = [
    '../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2.h5',
    '../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2_0.h5',
    '../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2_1.h5',
    '../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2_2.h5',
    '../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2_3.h5',
    '../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2_4.h5',
    '../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2_5.h5',
    '../from_cannon/2021_03_08/mnist_DHC_test_RR_wd2_6.h5'
]

In [39]:
prec_R_R = []
for file in test_list:
    mnist_DHC_outR_test = hd5_open(file,'main/data')
    prec_R_R.append(prec_LDA(ldaR,DHC_iso(mnist_DHC_outR_test,6,8),mnist_test_y))

In [40]:
np.mean(prec_R_R),np.std(prec_R_R)

(0.921025, 0.0004630064794362928)

In [41]:
prec_R_R

[0.9213, 0.9206, 0.9201, 0.9215, 0.9216, 0.9209, 0.921, 0.9212]

In [46]:
prec_NR_R = []
for file in test_list:
    mnist_DHC_outR_test = hd5_open(file,'main/data')
    prec_NR_R.append(prec_LDA(ldaNR,DHC_iso(mnist_DHC_outR_test,6,8),mnist_test_y))

In [47]:
np.mean(prec_NR_R),np.std(prec_NR_R),prec_NR_R

(0.8766375,
 0.0023743091100360265,
 [0.873, 0.8734, 0.8782, 0.8806, 0.8769, 0.8773, 0.8757, 0.878])

In [48]:
prec_NRAug_R = []
for file in test_list:
    mnist_DHC_outR_test = hd5_open(file,'main/data')
    prec_NRAug_R.append(prec_LDA(ldaAug,DHC_iso(mnist_DHC_outR_test,6,8),mnist_test_y))

In [49]:
np.mean(prec_NRAug_R),np.std(prec_NRAug_R),prec_NRAug_R

(0.921175,
 0.0008073877630977533,
 [0.9207, 0.9209, 0.9207, 0.9225, 0.9226, 0.9207, 0.9204, 0.9209])

In [51]:
prec_R_R_REG = []
for file in test_list:
    mnist_DHC_outR_test = hd5_open(file,'main/data')
    prec_R_R_REG.append(prec_LDA(ldaRREG,mnist_DHC_outR_test,mnist_test_y))
np.mean(prec_R_R_REG),np.std(prec_R_R_REG),prec_R_R_REG

(0.9232875,
 0.0009649319924222641,
 [0.922, 0.9225, 0.923, 0.9238, 0.9244, 0.9234, 0.9223, 0.9249])

In [52]:
prec_NR_R_REG = []
for file in test_list:
    mnist_DHC_outR_test = hd5_open(file,'main/data')
    prec_NR_R_REG.append(prec_LDA(ldaNRREG,mnist_DHC_outR_test,mnist_test_y))
np.mean(prec_NR_R_REG),np.std(prec_NR_R_REG),prec_NR_R_REG

(0.5018875,
 0.005049118116067407,
 [0.4972, 0.4956, 0.5048, 0.499, 0.5068, 0.4962, 0.5073, 0.5082])

In [53]:
prec_AUG_R_REG = []
for file in test_list:
    mnist_DHC_outR_test = hd5_open(file,'main/data')
    prec_AUG_R_REG.append(prec_LDA(ldaAugREG,mnist_DHC_outR_test,mnist_test_y))
np.mean(prec_AUG_R_REG),np.std(prec_AUG_R_REG),prec_AUG_R_REG

(0.884325,
 0.0016551057368035316,
 [0.8851, 0.8823, 0.8827, 0.8845, 0.883, 0.886, 0.8874, 0.8836])