In [3]:
import numpy as np
import pandas as pd
import dask.dataframe as dd
import matplotlib.pyplot as plt
from tqdm import tqdm
from utils import *

In [4]:
df = pd.read_csv('test_set_sample.csv')

In [5]:
df.head()

Unnamed: 0,object_id,mjd,passband,flux,flux_err,detected
0,13,59798.3205,2,-1.299735,1.357315,0
1,13,59798.3281,1,-2.095392,1.148654,0
2,13,59798.3357,3,-0.923794,1.763655,0
3,13,59798.3466,4,-4.009815,2.602911,0
4,13,59798.3576,5,-3.403503,5.367328,0


In [6]:
zscore = lambda x: (x - x.mean()) / x.std()

In [7]:
df['dt'] = df.groupby(['object_id','passband'])['mjd'].transform(lambda x: x.diff()).fillna(-10)

In [8]:
df['x'] = np.log(1+np.power(df['flux']/df['flux_err'],2))

In [9]:
df['x'] = df.groupby(['object_id','passband'])['x'].transform(zscore)

In [10]:
cols_to_drop = ['mjd', 'flux', 'flux_err']

In [11]:
df.drop(cols_to_drop, 1, inplace=True)

In [12]:
df['dt'] = df['dt'].apply(lambda x: np.log(1+x) if x > 0 else x)

In [13]:
df = df.groupby(['object_id', 'passband']).apply(
    lambda x: x.set_index(['object_id', 'passband']).to_dict(orient='list')
)

In [14]:
df.to_pickle('test_ts.pkl')

In [15]:
df.head()

object_id  passband
13         0           {'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
           1           {'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
           2           {'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
           3           {'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
           4           {'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
dtype: object

### Create DataLoaders

In [16]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

In [37]:
class TestDataset(torch.utils.data.Dataset):
    def __init__(self, xs):
        self.xs = xs

    def __getitem__(self, i):
        return self.xs[i]

    def __len__(self):
        return len(self.xs)

In [73]:
test_xs = pd.read_pickle('test_ts.pkl').unstack('passband')

In [74]:
test_xs.head()

passband,0,1,2,3,4,5
object_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
13,"{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
14,"{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
17,"{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
23,"{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
34,"{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","{'detected': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


In [75]:
ix = test_xs.index

In [77]:
pd.to_pickle(ix, 'test_xs_ix.pkl')

In [40]:
def format_sample(x):
    a = [T(np.array(v).astype(np.float32)) for k,v in x.items()]
    a = torch.stack(a)
    a = np.squeeze(a)
    return a

In [41]:
for pb in tqdm(range(6)):
    test_xs[pb] = test_xs[pb].apply(lambda x: format_sample(x))


100%|██████████| 6/6 [00:00<00:00,  6.67it/s]


In [51]:
def test_collate(batch):
    data = [item for item in batch]
    data = np.squeeze(data)
    return data

In [52]:
test_set = TestDataset(test_xs.values)

In [66]:
test_loader = DataLoader(test_set, batch_size=len(test_set), shuffle=False, collate_fn=test_collate)

In [67]:
a = next(iter(test_loader))

In [68]:
data = a[0]

In [69]:
b=a[0][0][0][0]

In [70]:
len(data[0][0])

72

In [71]:
len(data)

6

### Export

In [72]:
torch.save(test_loader, 'test_loader.pt')


In [None]:
test = torch.load('train_loader.pt')

In [None]:
a = next(iter(test))

In [None]:
data = a[0]

In [None]:
len(data)

In [None]:
len(data[0])

In [None]:
data[0][0].shape