### Fitting OCCAM clusters.

This notebook is about finding using PCA those directions along which open clusters vary

**TO DO**

- Incorporate the pca transforms into the latentvector framework somehow.

- Include tools for studying the explained variance in a chosen direction of hyperspace.

- Include tools for plotting cluster breakdowns along random directions.

In [1]:
import astropy
from astropy.io import fits
import numpy as np
import matplotlib.pyplot as plt

import inspect

from apoNN.src.occam import Occam

import apogee.tools.read as apread
import apogee.tools.path as apogee_path
from apogee.tools import bitmask
import collections


from apoNN.src.datasets import ApogeeDataset
from apoNN.src.utils import generate_loss_with_masking
from apoNN.src.utils import dump
from apoNN.src.utils import load
from apoNN.src.vectors import *

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn
from sklearn.decomposition import PCA


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

apogee_path.change_dr(16)

## Loading datasets


We load all of our different datasets

In [2]:
occam = load("occam")
allStar_occam = occam["allStar"]
dataset_occam = ApogeeDataset(allStar_occam,outputs = ["aspcap","mask","physical","idx"])
occam_cluster_idxs = occam["cluster_idxs"]

In [3]:
allStar_behaved_restricted =  load("allStar_behaved_restricted")
dataset_behaved_restricted =  ApogeeDataset(filename="behaved_restricted",outputs = ["aspcap","mask","physical","idx"])

In [None]:
considered_parameters = ["Teff","logg","Fe_H","O_FE","C_FE","Na_FE","Mg_FE","Si_FE","S_FE","Al_FE","P_FE","Ni_FE"] 
y = Vector(np.array([cut_allStar[param] for param in considered_parameters])[:,:n_data].T)

In [4]:
autoencoder = torch.load("/share/splinter/ddm/taggingProject/apogeeFactory/outputs/guild/z10/ae_3600.p")


### Creating vectors using the dataset

In [63]:
#z_behaved_restricted = LatentVector(dataset_behaved_restricted,autoencoder,len(dataset_behaved_restricted))
z_behaved_restricted = LatentVector(dataset_behaved_restricted,autoencoder,2000)

In [64]:
z_occam = OccamLatentVector(dataset_occam,autoencoder,occam_cluster_idxs,len(dataset_occam))

In [65]:
occam_cluster_idxs[-25]

'NGC 752'

In [66]:
z_occam.cluster_centered

array([[ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [-0.28926706,  0.01641631, -0.10192025, ...,  0.09440404,
        -0.0907461 ,  0.09809157],
       [-0.19106913,  0.01425755,  0.01998723, ...,  0.03749567,
        -0.06772506,  0.14812911],
       ...,
       [-0.04308999,  0.05342221,  0.07310522, ...,  0.02542442,
        -0.12589973, -0.03005296],
       [-0.18691492,  0.04816008,  0.10789359, ...,  0.03099191,
        -0.21405321,  0.05582041],
       [-0.04694963, -0.0819484 , -0.14627397, ...,  0.00577688,
         0.15814644,  0.10526949]])

### Setting up the PCA

In [71]:
whitening_pca  = PCA(n_components=10,whiten=True)
cluster_pca = PCA(n_components=9)

In [72]:
whitening_pca.fit(z_behaved_restricted.centered[:1000])

PCA(copy=True, iterated_power='auto', n_components=10, random_state=None,
    svd_solver='auto', tol=0.0, whiten=True)

In [73]:
whitening_pca.explained_variance_ratio_

array([0.38824588, 0.22965056, 0.13258937, 0.11214939, 0.06029538,
       0.04241241, 0.02002985, 0.00836793, 0.00337172, 0.00288749],
      dtype=float32)

In [76]:
cluster_pca.fit(whitening_pca.transform(z_behaved_restricted.centered[1000:1100]))

PCA(copy=True, iterated_power='auto', n_components=9, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [57]:
cluster_pca.fit(whitening_pca.transform(z_occam.cluster_centered))

PCA(copy=True, iterated_power='auto', n_components=9, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [77]:
cluster_pca.explained_variance_ratio_

array([0.16421404, 0.15969291, 0.144708  , 0.12585893, 0.09265564,
       0.08291412, 0.07127303, 0.06373993, 0.05022834], dtype=float32)

### Linear transformation

In [78]:
w = LinearTransformation(z_behaved_restricted,y)

NameError: name 'y' is not defined