In [1]:
import torch
import pandas as pd
import astropy as ap
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import pdb
from scipy.ndimage.filters import maximum_filter1d
import glob
import fitsio as fits
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data_utils
from torch.distributions import Bernoulli, Binomial, Normal, Uniform
from torch.nn.utils import clip_grad_norm_
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, pad_sequence
from torch.distributions.normal import Normal
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import math
from utils import pooling, batchify
from config import TRAIN_DATA, CV_DATA

In [2]:
n_samples = 100
test_size = 0.1
pool = 8

In [3]:
train_data = pd.read_csv(TRAIN_DATA, delimiter='\t', header=None)
cv_data = pd.read_csv(CV_DATA, delimiter='\t', header=None)
df = pd.concat([train_data, cv_data], axis=0, ignore_index=True)
df = df.sample(n=n_samples)
df.drop(0, axis=1, inplace=True)
data = df.values.astype(np.float32)
data = pooling(data, (1,pool))

In [4]:
data = torch.FloatTensor(data)

In [5]:
data.shape

torch.Size([100, 128])

In [6]:
sl = data.size(1); sl

128

In [7]:
t = torch.FloatTensor(np.arange(0, 1, 1/sl))

In [8]:
t

tensor([0.0000, 0.0078, 0.0156, 0.0234, 0.0312, 0.0391, 0.0469, 0.0547, 0.0625,
        0.0703, 0.0781, 0.0859, 0.0938, 0.1016, 0.1094, 0.1172, 0.1250, 0.1328,
        0.1406, 0.1484, 0.1562, 0.1641, 0.1719, 0.1797, 0.1875, 0.1953, 0.2031,
        0.2109, 0.2188, 0.2266, 0.2344, 0.2422, 0.2500, 0.2578, 0.2656, 0.2734,
        0.2812, 0.2891, 0.2969, 0.3047, 0.3125, 0.3203, 0.3281, 0.3359, 0.3438,
        0.3516, 0.3594, 0.3672, 0.3750, 0.3828, 0.3906, 0.3984, 0.4062, 0.4141,
        0.4219, 0.4297, 0.4375, 0.4453, 0.4531, 0.4609, 0.4688, 0.4766, 0.4844,
        0.4922, 0.5000, 0.5078, 0.5156, 0.5234, 0.5312, 0.5391, 0.5469, 0.5547,
        0.5625, 0.5703, 0.5781, 0.5859, 0.5938, 0.6016, 0.6094, 0.6172, 0.6250,
        0.6328, 0.6406, 0.6484, 0.6562, 0.6641, 0.6719, 0.6797, 0.6875, 0.6953,
        0.7031, 0.7109, 0.7188, 0.7266, 0.7344, 0.7422, 0.7500, 0.7578, 0.7656,
        0.7734, 0.7812, 0.7891, 0.7969, 0.8047, 0.8125, 0.8203, 0.8281, 0.8359,
        0.8438, 0.8516, 0.8594, 0.8672, 

In [10]:
# def subsample(y):
#     p = Uniform(0.2, 0.5)
#     measurements = Binomial(probs=p.sample()).sample(y.size())
#     y_measure = y * measurements
#     y_out = torch.stack((t, y_measure, y))
#     return y_out

In [9]:
def make_sample(y, p=0.5):
    L = len(t)
    l = int(p*L)
    t1 = t[:l]
    t2 = t[l:]
    y1 = y[:l]
    y2 = y[l:]
    out = torch.stack((t1,t2,y1,y2))
    return out

In [11]:
test=make_sample(data[0])

In [12]:
test.shape

torch.Size([4, 64])

In [19]:
test[3]

tensor([-1.3415, -1.2727, -1.2236, -1.1954, -1.1811, -1.1756, -1.1755, -1.1810,
        -1.1959, -1.2072, -1.1688, -1.0862, -0.9726, -0.8585, -0.7791, -0.7557,
        -0.7572, -0.7892, -0.8345, -0.8300, -0.7623, -0.6746, -0.5934, -0.5339,
        -0.4984, -0.4812, -0.4738, -0.4658, -0.4449, -0.4034, -0.3431, -0.2760,
        -0.2202, -0.1857, -0.1668, -0.1428, -0.0884,  0.0030,  0.1090,  0.1962,
         0.2382,  0.2415,  0.2341,  0.2128,  0.1970,  0.1919,  0.1914,  0.1869,
         0.1810,  0.2014,  0.2499,  0.3234,  0.4083,  0.4899,  0.5588,  0.6124,
         0.6507,  0.6758,  0.6955,  0.7221,  0.7666,  0.8312,  0.9150,  1.0191])

In [20]:
dataset = []
for y in tqdm(data):
    sample = make_sample(y)
    dataset.append(sample)

100%|██████████| 100/100 [00:00<00:00, 14277.02it/s]


In [21]:
dataset = torch.stack(dataset)

In [22]:
dataset.shape

torch.Size([100, 4, 64])

### Dataloaders

In [23]:
from torch.utils.data import DataLoader
from utils import collate_ts, collate_extrap, batchify
from loading import ToyDataset

In [24]:
train_loader = DataLoader(dataset, batch_size=10, collate_fn=collate_extrap)

In [25]:
torch.save(train_loader, 'ucr_train_extrap.pt')