In [85]:
import h5py as h
import numpy as np
import matplotlib.pyplot as plt
import torch 
import torch.nn as nn
import numpy as np
import sys, os, time
import optuna

In [86]:
ds = h.File('catalog.h5')

In [87]:
all_halo_props_at_all_z = {}
for halo in ds.keys():
    halo_props_at_all_z = {}
    for idx, z in enumerate(list(ds[halo]['redshift'])):
        if list(ds[halo]['fesc'])[idx] >= 1e-5:
            halo_props_at_all_z[z] = [list(ds[halo]['SFR'])[idx], list(ds[halo]['Mstar'])[idx], list(ds[halo]['mass'])[idx], \
                                          list(ds[halo]['fgas'])[idx], list(ds[halo]['redshift'])[idx], list(ds[halo]['fesc'])[idx]]
    #if len(list(halo_props_at_all_z.keys())) == 0:
    #    continue
    all_halo_props_at_all_z[halo] = halo_props_at_all_z

In [88]:
redshifts40 = []
for halo in ds.keys():
    if len(list(ds[halo]['redshift'])) == 40:
           redshifts40 = list(ds[halo]['redshift'])

In [89]:
all_properties = []
for halo_idx, halo_num in enumerate(list(all_halo_props_at_all_z.keys())):
    halo_props = []
    temp = all_halo_props_at_all_z[halo_num].keys()
    try:
        min_props = all_halo_props_at_all_z[halo_num].get(max(temp))
    except:
        continue
    for redshift in redshifts40:
        if redshift not in temp:
            halo_props.append(min_props)
        else:
            halo_props.append(all_halo_props_at_all_z[halo_num].get(redshift))
    all_properties.append(halo_props)
all_properties_reshape = np.reshape(np.array(all_properties), (151,40,6))

In [90]:
X = all_properties_reshape
y = all_properties_reshape[:,:,5]

In [91]:
X_train = torch.Tensor(X[15:,:,0:5])
y_train = torch.Tensor(y[15:,:])
X_val = torch.Tensor(X[:15, :, 0:5])
y_val = torch.Tensor(y[:15, :])

In [92]:
single_halo_x = X_train[0]
single_halo_y = y_train[0]
print(single_halo_y)

tensor([0.1724, 0.1183, 0.0720, 0.0463, 0.1055, 0.0710, 0.0449, 0.0253, 0.4159,
        0.4159, 0.4159, 0.0146, 0.0164, 0.4159, 0.0073, 0.4159, 0.0184, 0.0343,
        0.0316, 0.4159, 0.4159, 0.4159, 0.4159, 0.4159, 0.4159, 0.4159, 0.4159,
        0.4159, 0.4159, 0.4159, 0.4159, 0.4159, 0.4159, 0.4159, 0.4159, 0.4159,
        0.4159, 0.4159, 0.4159, 0.4159])


## Template LSTM

In [93]:
test_size = 10
train_set = single_halo_x[:-test_size]
test_set = single_halo_x[-test_size:]

In [94]:
def input_data(seq,ws):
    out = []
    L = len(seq)
    
    for i in range(L-ws):
        window = seq[i:i+ws]
        label = seq[i+ws:i+ws+1]
        out.append((window,label))
    
    return out

In [95]:
window_size = 10
train_data = input_data(train_set, window_size)
len(train_data)

20

In [96]:
class LSTM(nn.Module):
    
    def __init__(self,input_size = 5, hidden_size = 50, out_size = 1):
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.linear = nn.Linear(hidden_size,out_size)
        self.hidden = (torch.zeros(1,1,hidden_size),torch.zeros(1,1,hidden_size))
    
    def forward(self,seq):
        lstm_out, self.hidden = self.lstm(seq.view(len(seq),1,-1), self.hidden)
        pred = self.linear(lstm_out.view(len(seq),-1))
        return pred[-1]

In [97]:
torch.manual_seed(42)
model = LSTM()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [98]:
epochs = 10
future = 1

for i in range(epochs):
    
    for seq, y_train in train_data:
        optimizer.zero_grad()
        model.hidden = (torch.zeros(1,1,model.hidden_size),
                       torch.zeros(1,1,model.hidden_size))
        
        y_pred = model(seq)
        loss = criterion(y_pred, y_train)
        loss.backward()
        optimizer.step()
        
    print(f"Epoch {i} Loss: {loss.item()}")
    
    preds = train_set[-window_size:].tolist() # train_set is single_halo_x[:-test_size]
    print(preds)
    seq = torch.FloatTensor(preds[-window_size:])
    with torch.no_grad():
        model.hidden = (torch.zeros(1,1,model.hidden_size),
                       torch.zeros(1,1,model.hidden_size))
        preds.append(model(seq).item())
    print(preds)
    loss = criterion(torch.tensor(preds[-window_size:]), single_halo_y[-window_size-test_size:-test_size])
    print(f"Performance on test range: {loss}")
    

Epoch 0 Loss: 135735143301120.0
[[0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273], [0.00017505112919025123, 3501.0224609375, 28871420.0, 0.0886339396238327, 17.301610946655273]]
[[0.0001750511291

TypeError: not a sequence

## RNN

In [99]:
window_size = 10

In [106]:
def input_data(seq,ws,labels):
    out = []
    L = 40
    counter = 0
    for i in range(L-ws-1):
        if counter > 130:
            break
        window = seq[counter:counter+10,i:i+ws,:]
        # print(window.shape)
        label = labels[counter:counter+10,i+ws:i+ws+1]
        # print(label.shape)
        counter+=10 
        out.append((window,label))
        
    return out

In [107]:
train_data = input_data(X_train, 20, y_train)
print([len(t) for t in train_data])
print([len(t[0]) for t in train_data])
print([len(t[1]) for t in train_data])
print([(t[0].numpy().shape) for t in train_data])

[2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
[10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 6]
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[(10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (10, 20, 5), (6, 20, 5)]


In [108]:
class LSTMnetwork(nn.Module):
    def __init__(self,input_size= 5,hidden_size=5,output_size=1): # should be input 5 hidden 1 output 1
        super().__init__()
        self.hidden_size = hidden_size
        
        # add an LSTM layer:
        self.lstm = nn.LSTM(input_size,hidden_size, batch_first = True)
        
        # add a fully-connected layer:
        self.linear = nn.Linear(hidden_size,output_size)
        
        # initializing h0 and c0:
        self.hidden = (torch.zeros(1,136,self.hidden_size), # should be 1, 136
                       torch.zeros(1,136,self.hidden_size)) # should be 1, 136

    def forward(self,seq):
        lstm_out, self.hidden = self.lstm(
            seq.view(len(seq),1,-1), self.hidden)
        pred = self.linear(lstm_out.view(len(seq),-1))
        return pred[-1]

In [109]:
torch.manual_seed(42)

# instantiate
model = LSTMnetwork()

# loss
criterion = nn.MSELoss()

#optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model

LSTMnetwork(
  (lstm): LSTM(5, 5, batch_first=True)
  (linear): Linear(in_features=5, out_features=1, bias=True)
)

In [110]:
epochs = 100

import time
start_time = time.time()

for epoch in range(epochs):
    for seq, target in train_data:
        print(seq.shape)
        optimizer.zero_grad()
        model.hidden = (torch.zeros(1,136,model.hidden_size), # should be 1, 136
                        torch.zeros(1,136,model.hidden_size)) # should be 1, 136
        
        y_pred = model(seq)
        
        loss = criterion(y_pred, target)
        loss.backward()
        optimizer.step()
        
    print(f'Epoch: {epoch+1:2} Loss: {loss.item():10.8f}')
    
print(f'\nDuration: {time.time() - start_time:.0f} seconds')

torch.Size([10, 20, 5])


RuntimeError: input.size(-1) must be equal to input_size. Expected 5, got 100

In [111]:
future = 10

preds = y_train[-window_size:].tolist()

model.eval()

for i in range(future):
    seq = torch.FloatTensor(preds[-window_size:])
    with torch.no_grad():
        model.hidden = (torch.zeros(1,1,model.hidden_size),
                        torch.zeros(1,1,model.hidden_size))
        preds.append([t.numpy() for t in model(seq)])
preds[window_size:]

ValueError: expected sequence of length 5 at dim 1 (got 1)

In [None]:
y_train[-window_size:].shape

In [None]:
X_train.shape