### Weakly-supervised learning

The idea of this notebook is to train a neural network at reconstructing spectra while also using weakly supervised learning to ensure that a fraction of the representation is as well preserved as possible for stars from the same cluster.

In [1]:
import apogee.tools.read as apread
import matplotlib.pyplot as plt 
import apogee.tools.path as apogee_path
from apogee.tools import bitmask

import random
import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch.nn as nn

from apoNN.src.datasets import ApogeeDataset,AspcapDataset
from apoNN.src.utils import get_mask_elem,dump,load,generate_loss_with_masking

import apoNN.src.vectors as vector


from tagging.src.networks import ConditioningAutoencoder,Embedding_Decoder,Feedforward,ParallelDecoder,Autoencoder,AutoencoderwLinear

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
apogee_path.change_dr(16)





[(['TEFF', 'LOGG', 'LOG10VDOP', 'METALS', 'C', 'N', 'O Mg Si S Ca Ti'], ['C', 'N', 'O', 'Na', 'Mg', 'Al', 'Si', 'S', 'K', 'Ca', 'Ti', 'V', 'Mn', 'Fe', 'Ni'], ['[C/M]', '[N/M]', '[O/M]', '[Na/H]', '[Mg/M]', '[Al/H]', '[Si/M]', '[S/M]', '[K/H]', '[Ca/M]', '[Ti/M]', '[V/H]', '[Mn/H]', '[Fe/H]', '[Ni/H]'], [0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1])]


### Hyperparameters

We dump here all of our hyperparmeters for dealing with latter

In [2]:
dataset_name = "aspcap_training_clean"
recenter=True
n_bins = 8575
n_z = 7
n_shared = 5
activation = nn.LeakyReLU()
lr = 0.00005
n_batch = 64
encoder_architecture = [n_bins,512,256,128,n_z]
decoder_architecture = [n_z,128,256,512,n_bins]

### Setting up the data


We need two datasets. One containing the full dataset and another containing the occam dataset

In [3]:
dataset = AspcapDataset(filename=dataset_name,recenter=recenter)
dataset_occam = AspcapDataset(filename="aspcap_occam",recenter=True,tensor_type=torch.FloatTensor)

In [4]:
loader = torch.utils.data.DataLoader(dataset = dataset,
                                     batch_size = n_batch,
                                     shuffle= True,
                                     drop_last=True)

In [5]:
occam = load("occam")
occam_cluster_idxs = occam["cluster_idxs"]
registry = vector.OccamLatentVector.make_registry(occam_cluster_idxs)

creating a neural network

In [6]:
encoder = Feedforward(encoder_architecture ,activation=activation).to(device)
decoder = Feedforward(decoder_architecture ,activation=activation).to(device)
#autoencoder = Autoencoder(encoder,decoder,n_bins=n_bins,intermediate_activation=activation).to(device)
#optimizer_autoencoder = torch.optim.Adam(autoencoder.parameters(), lr=lr)



In [7]:
linear_bottleneck = Feedforward([n_z,n_z,n_z,n_z],activation=None).to(device)

In [8]:
AutoencoderwLinear(encoder,decoder,linear_bottleneck,n_bins=n_bins,intermediate_activation=activation).to(device)

AutoencoderwLinear(
  (encoder): Feedforward(
    (fc): Sequential(
      (0): Linear(in_features=8575, out_features=512, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
      (2): Linear(in_features=512, out_features=256, bias=True)
      (3): LeakyReLU(negative_slope=0.01)
      (4): Linear(in_features=256, out_features=128, bias=True)
      (5): LeakyReLU(negative_slope=0.01)
      (6): Linear(in_features=128, out_features=7, bias=True)
    )
  )
  (decoder): Feedforward(
    (fc): Sequential(
      (0): Linear(in_features=7, out_features=128, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
      (2): Linear(in_features=128, out_features=256, bias=True)
      (3): LeakyReLU(negative_slope=0.01)
      (4): Linear(in_features=256, out_features=512, bias=True)
      (5): LeakyReLU(negative_slope=0.01)
      (6): Linear(in_features=512, out_features=8575, bias=True)
    )
  )
  (linear): Feedforward(
    (fc): Sequential(
      (0): Linear(in_features=7, out_features=7, bias=T

In [None]:
linear_bottleneck(z)

In [None]:
autoencoder

In [None]:
loss = nn.L1Loss()
masked_loss = generate_loss_with_masking(loss)


### Training Loop

In [None]:
_,occam_idxs = random.choice(list(registry.items()))
occam_idxs

In [None]:
dataset_occam[occam_idxs]

In [None]:
def loss_std_sample(z_occam,z_full,eps=0.0000001):
    #occam_std = torch.std(z_occam,dim=0)
    #full_std = torch.std(z_full,dim=0)
    #return torch.mean(occam_std/(full_std+eps))
    return torch.mean(torch.abs(z_occam))

In [None]:
training_loss= []
for i in range(30000):
    for j,(x,x_raw,x_err,idx) in enumerate(loader):
        optimizer_autoencoder.zero_grad()
        
        #occam-sample
        _,occam_idxs = random.choice(list(registry.items()))
        x_occam,x_occam_raw,x_occam_err,occam_idx = dataset_occam[occam_idxs]
        _,z_occam = autoencoder(x.to(device))
        z_occam_shared = z_occam[:,:n_shared]

        #regular sample
        x_pred,z = autoencoder(x.to(device))
        z_shared = z[:,:n_shared]

        mask_spec = x_err<dataset.err_threshold
        err_pred = masked_loss(x_pred,x.to(device),mask_spec)
        err_std = loss_std_sample(z_occam_shared,z_shared)
        
        err_tot = err_std+err_pred+err_std
        err_tot.backward()
        optimizer_autoencoder.step()
        print(f"err:{err_tot},err_pred:{err_pred},err_std:{err_std}")
    training_loss.append(err_pred.item())
                 

In [None]:
print(autoencoder.encoder.fc[2].weight.grad)

In [None]:
z.shape