<img src="../logo/AppleCiDEr.png" width="150"/>


<a id='index'></a>
# 000 - data preprocessing


1. [preprocessing a single object](#single)
    - [plot light curve](#LC)
    - [object images](#images)
2. [preprocess dataset](#dataset)

In [None]:
import os
import pandas as pd
import sys ; sys.path.insert(0, '/projects/bcrv/abrown3')

import AppleCider.preprocess.plot_data as plot_data

from IPython.display import display

from AppleCider.preprocess.data_preprocessor import (
    AlertProcessor,
    PhotometryProcessor,
    DataPreprocessor,
    SpectraProcessor
)
import AppleCider.preprocess.plot_data as plot_data

import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.gridspec import GridSpec

import pickle
import plotly.graph_objects as go
import plotly.subplots as sp
import seaborn as sns

import torch ;  torch.set_printoptions(precision=10)
from tqdm.auto import tqdm
from numpy import unravel_index
import numpy as np
from matplotlib.patches import Rectangle

In [None]:
data_dir = '/work/hdd/bcrv/ffontinelenunes/data/AppleCider/data_ztf/'

cider_BTS = pd.read_csv('/projects/bcrv/abrown3/new-csv-pkl/cider_BTS.csv')
cider_BTS['type'].value_counts()

<a id='single'></a>


### preprocessing steps: single object

<i>a) alert processor, b) photometry processor</i>

`PhotometryProcessor` deals with aux alerts (saved as `photometry.csv`) and `AlertProcessor` deals with alerts (`alerts.npy`). see `000-query-data.ipynb` for more information.

<br>

<small><i>back to [index](#index)</small></i>

In [None]:
obj_id = 'ZTF24aaemydm'

## loading data from folder
## change column names, add object id, object type to make photo_df
photo_df = PhotometryProcessor.process_csv(obj_id, cider_BTS, data_dir)
alert_df, images = AlertProcessor.get_process_alerts(obj_id, data_dir)

In [None]:
photo_df[:3]

In [None]:
alert_df[:3]

In [None]:
## sort by JD
## add metadata missing from photometry
photo_df, alert_df = photo_df.sort_values(by='jd'), alert_df.sort_values(by='jd')
## TODO: rename function
photo_df = PhotometryProcessor.add_metadata_to_photometry(photo_df, alert_df)

In [None]:
photo_df[:3]

<a id='LC'></a>

In [None]:
plot_data.plot_photometry_magnitude(photo_df)

In [None]:
def plot_image_pres(image):
    """ use to graph images of objects after basic preprocessing"""
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    fig.subplots_adjust(wspace=0.01, hspace=0.1)
    titles = ['Science Image', 'Reference Image', 'Difference']
    
    for i, ax in enumerate(axes):
        ax.imshow(image[:, :, i], cmap='magma' )
        ax.set_title(titles[i], fontsize=14)
        ax.axis('off')
    plt.show() 

<a id='images'></a>

In [None]:
print(len(images))

n = 1
selected_index = np.round(np.linspace(3, len(images) - 1, n)).astype(int)
print('Selected index:', selected_index)
for i in selected_index:
    print('Image number:', i)
    plot_image_pres(images[i])

In [None]:
## convert magnitude to flux
photo_df = DataPreprocessor.convert_photometry(photo_df)
plot_data.plot_photometry_magnitude(photo_df)

In [None]:
## cut down to only early-ish photometry
max_ = min(photo_df['mjd'].max(), 10)
photo_df = photo_df[photo_df['mjd'] <= max_]
photo_df

In [None]:
alert_df = alert_df[alert_df['jd'] <= photo_df['jd'].max()]
metadata_df = DataPreprocessor.preprocess_metadata(alert_df)
metadata_df_norm = metadata_df.drop(columns=['jd'])

start_index = PhotometryProcessor.get_first_valid_index(photo_df)

alert_indices = list(range(start_index, len(metadata_df))) 
print("valid alert indices: ", alert_indices, '\n')

<i>f) spectra</i>

spectra preprocessing depends on the spectra model being used. 

***
<a id='dataset'></a>
### 2. preprocessing dataset

<small><i>back to [index](#index)</small></i>

In [None]:
# where we want to save the dataset
DATA_PATH = '/projects/bcrv/abrown3/cider_BTS'

`TransientDataset`

In [None]:
dataset = TransientDataset(DATA_PATH, base_path=data_dir, max_mjd=10,
                                 normalize_light_curve=False, include_spectra=False,
                                 include_flux_err=True)