In [1]:
import h5py
import torch 
import numpy as np
import torch.nn as nn 
import torch.optim as optim
from sklearn.model_selection import train_test_split

print("Finished importing")

Finished importing


In [14]:
# Data preprocessing - making sure that each of the files has a 0 or 1 corresponding 
# to whether or not we have a bns or a nsbh or not.

bns = h5py.File('comb1_bns.hdf5','r')
nsb = h5py.File('comb5_nsbh.hdf5','r')

eos = 'APR4'
band = 'mAB_R'

bns_eos_band = bns[eos][band] # shape -> (7522, 400), (len(bns_eos_band), len(bns[eos][band][0]))
nsb_eos_band = nsb[eos][band] # shape -> (439, 400)
time_band = bns[eos]['time'] # shape -> (400,)

bns_label = np.zeros(len(bns_eos_band))
nsb_label = np.ones(len(nsb_eos_band))

all_data = np.concatenate([bns_eos_band, nsb_eos_band], axis=0)
all_data_with_time = np.concatenate([all_data, time_band], axis=0)
all_labels = np.concatenate([bns_label, nsb_label], axis=0)

print_structure = True

# Creating the dataset
with h5py.File('light_curves_without_time.hdf5', 'w') as f:
    f.create_dataset('light_curves', data=all_data, compression="gzip", compression_opts=9)
    
    # Dataset for labels
    f.create_dataset('labels', data=all_labels, compression="gzip", compression_opts=9)
    
    # Add metadata
    f.attrs['description'] = "Light curve data for BNS and NSBH mergers, without time"
    f.attrs['bands'] = ["mAB_band"]
    f.attrs['BNS_class'] = 0  # BNS class label
    f.attrs['NSBH_class'] = 1  # NSBH class label

    f.close()

def print_structure(name, obj):
    # Prints the name and type of each item in the HDF5 file
    print(f"{name}: {type(obj)}")
    if isinstance(obj, h5py.Dataset):  # If it's a dataset, print shape and dtype
        print(f"  - Shape: {obj.shape}")
        print(f"  - Data type: {obj.dtype}")
    elif isinstance(obj, h5py.Group):  # If it's a group, list the contents
        print(f"  - Contains: {list(obj.keys())}")

with h5py.File('light_curves_without_time.hdf5', 'r') as f:
    print("File Structure:")
    f.visititems(print_structure)
    
    # Print file-level attributes
    print("\nFile Attributes:")
    for key, value in f.attrs.items():
        print(f"  {key}: {value}")

    f.close()
    
print()
print("Finished pre-processing the data")

ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)

In [13]:
# Data splitting into X and y
bns_time_band = bns[eos]['time']
nsb_time_band = nsb[eos]['time']

eos_band_time_file = 'light_curves_without_time.hdf5'

with h5py.File(eos_band_time_file, 'r') as f:
    light_curves = f['light_curves'][:]  # Shape: (7961, 400)
    labels = f['labels'][:]              # Shape: (7961,)

    f.close()

X_bns = light_curves[0:400]
y_bns = labels[0:400]

X_nsbh = light_curves[7522:7960]
y_nsbh = labels[7522:7960]

X = np.concatenate((X_bns, X_nsbh), axis=0)
y = np.concatenate((y_bns, y_nsbh), axis=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
y_train = y_train.long()

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)
y_test = y_test.long()

print("Finished splitting the data")
print(X_train)

400
400
Finished splitting the data
tensor([[54.1979, 25.9011, 25.0470,  ..., 26.5695, 26.5769, 26.5842],
        [54.3180, 25.9728, 25.1026,  ..., 27.7247, 27.7312, 27.7377],
        [54.1842, 25.8932, 25.0415,  ..., 29.0173, 29.0233, 29.0293],
        ...,
        [41.1781, 23.1423, 22.6846,  ..., 27.9779, 27.9835, 27.9890],
        [54.3616, 25.9992, 25.1233,  ..., 27.8714, 27.8775, 27.8836],
        [41.3506, 23.3130, 22.8497,  ..., 27.4105, 27.4162, 27.4219]])


In [6]:
model = nn.Sequential(
    nn.BatchNorm1d(400),
    nn.Linear(400, 64),
    nn.ReLU(),  
    nn.Linear(64, 8), 
    nn.ReLU(),
    nn.Linear(8, 2)
)

#class_counts = torch.bincount(y)
#total_class_counts = class_counts.sum()
#class_weights = total_class_counts / class_counts

#print(class_counts)

loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

print("Finished the model setup")

Finished the model setup


In [7]:
num_epochs = 50
batch_size = 8

for epoch in range(num_epochs):
    # perm = torch.randperm(len(X))
    # X = X[perm]
    # y = y[perm]

    perm = torch.randperm(len(X_train))
    X_train = X_train[perm]
    y_train = y_train[perm]

    epoch_loss = 0
    
    for i in range(0, len(X_train), batch_size):
        X_train_batch = X_train[i:i+batch_size]
        y_train_pred = model(X_train_batch)
        y_train_batch = y_train[i:i+batch_size]
        loss = loss_fn(y_train_pred, y_train_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        
    scheduler.step(epoch_loss)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss / len(X):.4f}")

Epoch 1/50, Loss: 0.0277
Epoch 2/50, Loss: 0.0110
Epoch 3/50, Loss: 0.0032
Epoch 4/50, Loss: 0.0015
Epoch 5/50, Loss: 0.0022
Epoch 6/50, Loss: 0.0007
Epoch 7/50, Loss: 0.0269
Epoch 8/50, Loss: 0.0224
Epoch 9/50, Loss: 0.0060
Epoch 10/50, Loss: 0.0034
Epoch 11/50, Loss: 0.0007
Epoch 12/50, Loss: 0.0081
Epoch 13/50, Loss: 0.0026
Epoch 14/50, Loss: 0.0004
Epoch 15/50, Loss: 0.0021
Epoch 16/50, Loss: 0.0004
Epoch 17/50, Loss: 0.0006
Epoch 18/50, Loss: 0.0064
Epoch 19/50, Loss: 0.0063
Epoch 20/50, Loss: 0.0071
Epoch 21/50, Loss: 0.0024
Epoch 22/50, Loss: 0.0002
Epoch 23/50, Loss: 0.0000
Epoch 24/50, Loss: 0.0000
Epoch 25/50, Loss: 0.0086
Epoch 26/50, Loss: 0.0068
Epoch 27/50, Loss: 0.0142
Epoch 28/50, Loss: 0.0004
Epoch 29/50, Loss: 0.0001
Epoch 30/50, Loss: 0.0000
Epoch 31/50, Loss: 0.0000
Epoch 32/50, Loss: 0.0053
Epoch 33/50, Loss: 0.0019
Epoch 34/50, Loss: 0.0000
Epoch 35/50, Loss: 0.0001
Epoch 36/50, Loss: 0.0001
Epoch 37/50, Loss: 0.0001
Epoch 38/50, Loss: 0.0088
Epoch 39/50, Loss: 0.

In [8]:
with torch.no_grad():
    y_test_pred = model(X_test)

y_test_pred_classes = torch.argmax(y_test_pred, dim=1)

accuracy = (y_test_pred_classes == y_test).float().mean()
print(f"Accuracy: {accuracy.item()}")

Accuracy: 1.0
