# Tutorial

In [1]:
cd /tf/astromer

/tf/astromer


In [2]:
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import os

from core.masking import get_padding_mask
from core.astromer import get_ASTROMER, train
from core.data  import (create_dataset, 
                        pretraining_records)

# This tutorial runs only in early mode. For the optimized code use train.py
# OTHERWISE you can comment the next ljupyine and reset the kernel before the finetuning
tf.config.run_functions_eagerly(False)

%load_ext autoreload
%autoreload 2

In [3]:
# source = './data/raw_data/MACHO/' # lightcurves folder
# metadata = 'data/raw_data/MACHO/MACHO_dataset.dat' # metadata file
# target = 'data/records/macho2/' # where records will be stored

source = '../Data/LCs/' # lightcurves folder
metadata = '../Data/metadata_train_big_filter.dat' # metadata file
target = './Data/Records/' # where records will be stored

### Metadata frame sample

In [4]:
meta = pd.read_csv(metadata)
meta = meta[meta.N > 100]

In [5]:
os.path.join(source,  meta['Path'].sample(1).values[0])

'../Data/LCs/F_101.20912.4727.dat'

### Lightcurve frame sample

In [6]:
lc_df = pd.read_csv(os.path.join(source,  meta['Path'].sample(1).values[0]))
lc_df.head()

Unnamed: 0,observation_date,red magnitude,red error
0,49064.7566,-3.794,0.22
1,49066.7239,-4.166,0.216
2,49075.7506,-3.846,0.304
3,49076.7996,-3.834,0.441
4,49078.7579,-4.028,0.233


### From .csv to .record 

In [None]:
create_dataset(meta, source, target, max_lcs_per_record=20000, n_jobs=7, subsets_frac=(0.8, 0.2))

  0%|          | 0/1 [00:00<?, ?it/s]

35.96000361442566
34.914400577545166
34.448344469070435
35.3814971446991
34.95212936401367
34.70088481903076


### Loading Data

In [None]:
batch_size = 10
max_obs = 50

In [None]:
from core.data import pretraining_records

train_batches = pretraining_records(os.path.join(target, 'Train'), batch_size, max_obs=max_obs)
valid_batches = pretraining_records(os.path.join(target, 'Val'), batch_size, max_obs=max_obs)

In [None]:
sum([1 for _ in train_batches.unbatch()])

In [None]:
sum([1 for _ in valid_batches.unbatch()])

In [None]:
valid_batches = pretraining_records(os.path.join(target, 'test'), batch_size, max_obs=max_obs)
sum([1 for _ in valid_batches.unbatch()])

### Visualizing Records

In [None]:
n = 0
for batch in train_batches:
    print(batch['mask'].shape)
    break

# Classification Input

In [None]:
from core.data import classification_records

%load_ext autoreload
%autoreload 2

In [None]:
target = 'data/records/macho/' # where records will be stored

In [None]:
dataset = classification_records(os.path.join(target, 'train'), batch_size=10, max_obs=100)
objects = pd.read_csv(os.path.join(target, 'objects.csv'))

In [None]:
n = 0
for batch in dataset:
    plt.plot(batch['input'][n][1:-1])
    print(batch['mask'][n][1:-1])
    plt.title('{}'.format(list(objects['label'].values)[batch['label'][n]]))
    break

In [None]:
lens = [batch['times'] for batch in dataset.take(10)]