In [1]:

import argparse
import torch
from concurrent.futures import ThreadPoolExecutor
import time
import sys
sys.path.append('./models')
from models import models
from dataset import HARPSDataset
from torch.utils.data import DataLoader
import os
import numpy as np
from astropy.io import fits


In [2]:

device = torch.device('cpu')
model = models.ae1d().to(device)
loss_fn = torch.nn.L1Loss(reduction='mean')
fnames = [fname for fname in os.listdir('removed2/') if fname.endswith('.fits')]
model.eval()

ae1d(
  (conv1): Sequential(
    (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv2): Sequential(
    (0): Conv1d(16, 16, kernel_size=(5,), stride=(2,), padding=(2,))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv3): Sequential(
    (0): Conv1d(16, 16, kernel_size=(5,), stride=(2,), padding=(2,))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv4): Sequential(
    (0): Conv1d(16, 32, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv5): Sequential(
    (0): Conv1d(32, 32, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv6): Sequential(
    (0): Conv1d(32, 32, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv7): Sequential(
    (0): Conv1d(32, 64, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): 

In [19]:
nan_output = []
nan_input = []
for i, fname in enumerate(fnames):
    if i % 500 == 0:
        print(f"reached {i} files.")
    flux = torch.from_numpy(fits.getdata(os.path.join('removed/', fname), ext=0).astype(np.float32))


    if not torch.isfinite(flux).all():
        nan_input.append(fname)
        
    flux = flux.unsqueeze(0).to(device)
    with torch.inference_mode():
        output = model(flux)
        
        if not torch.isfinite(output).all():
            nan_output.append(fname)


reached 0 files.
reached 500 files.
reached 1000 files.
reached 1500 files.
reached 2000 files.
reached 2500 files.
reached 3000 files.
reached 3500 files.
reached 4000 files.
reached 4500 files.
reached 5000 files.
reached 5500 files.
reached 6000 files.
reached 6500 files.
reached 7000 files.
reached 7500 files.
reached 8000 files.
reached 8500 files.
reached 9000 files.
reached 9500 files.
reached 10000 files.
reached 10500 files.
reached 11000 files.
reached 11500 files.
reached 12000 files.
reached 12500 files.
reached 13000 files.
reached 13500 files.
reached 14000 files.
reached 14500 files.
reached 15000 files.
reached 15500 files.
reached 16000 files.
reached 16500 files.
reached 17000 files.
reached 17500 files.
reached 18000 files.
reached 18500 files.
reached 19000 files.
reached 19500 files.


In [20]:
len(nan_input), len(nan_output)

(115, 115)

In [21]:
nan_input == nan_output

True

In [9]:
nan_output[0], nan_input[0]

('ADP.2014-10-01T10:20:24.770.fits', 'ADP.2014-10-01T10:20:24.770.fits')

In [10]:
flux = torch.from_numpy(fits.getdata('removed/ADP.2014-10-01T10:20:24.770.fits').astype(np.float32))

flux = flux.unsqueeze(0).to(device)
with torch.inference_mode():
    output = model(flux)
    loss = loss_fn(output, flux)

    print(output.isnan().any())
    print(loss.isnan())

tensor(True)
tensor(True)


In [12]:
list(model.children())[0]

Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
  (1): LeakyReLU(negative_slope=0.1, inplace=True)
)

In [21]:
layers = {}
for spectrum in nan_output:
    flux = torch.from_numpy(fits.getdata(f'removed/{spectrum}').astype(np.float32))

    for i, layer in enumerate(list(model.children())):
        flux = layer(flux)
        
        if flux.isnan().any():
            print(i, layer)
            if i not in layers:
                layers[i] = 0
            layers[i] += 1
            break

0 Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
  (1): LeakyReLU(negative_slope=0.1, inplace=True)
)
0 Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
  (1): LeakyReLU(negative_slope=0.1, inplace=True)
)
0 Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
  (1): LeakyReLU(negative_slope=0.1, inplace=True)
)
0 Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
  (1): LeakyReLU(negative_slope=0.1, inplace=True)
)
0 Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
  (1): LeakyReLU(negative_slope=0.1, inplace=True)
)
0 Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
  (1): LeakyReLU(negative_slope=0.1, inplace=True)
)
0 Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,))
  (1): LeakyReLU(negative_slope=0.1, inplace=True)
)
0 Sequential(
  (0): Conv1d(1, 16, kernel_size=(7,), stride=(2,), pad

In [17]:
with open('nan_output.txt', 'w') as f:
    f.write('\n'.join(nan_output))

In [22]:
layers

{0: 114}

In [3]:
model.load_state_dict(torch.load('models/model_14v3_128d_e116_i954k.pth.tar', map_location=device)['state_dict'],strict=True)

<All keys matched successfully>

In [12]:
flux = torch.from_numpy(fits.getdata('removed/ADP.2014-09-25T15:35:36.897.fits', ext=0).astype(np.float32)).unsqueeze(0)

out = model(flux)
if out.isnan().any():
    print('nan')

if not flux.isfinite().all():
    print('not finite')

if not out.isfinite().all():
    print('not finite')



nan
not finite
not finite


In [9]:
flux.isfinite().all()

tensor(False)

In [6]:
layers = {}

with open('nan_output.txt', 'r') as f:
    nan_output = f.read().split('\n')

for spectrum in nan_output:
    flux = torch.from_numpy(fits.getdata(f'removed/{spectrum}').astype(np.float64))

    for i, layer in enumerate(list(model.children())):
        flux = layer(flux)
        
        if flux.isnan().any():
            print(i, layer)
            if i not in layers:
                layers[i] = 0
            layers[i] += 1
            break

IsADirectoryError: [Errno 21] Is a directory: 'removed/'

In [32]:
layers

{0: 114}

In [None]:
for spectrum in nan_output:
    flux = torch.from_numpy(fits.getdata(f'removed/{spectrum}').astype(np.float32))
    

    out0 = list(model.children())[0](flux)

In [33]:
nan_conv = 0
for spectrum in nan_output:
    flux = torch.from_numpy(fits.getdata(f'removed/{spectrum}').astype(np.float32))
    flux = flux.unsqueeze(0)
    out0 = list(model.children())[0][0](flux)
    
    if out0.isnan().any():
        nan_conv += 1

In [34]:
nan_conv

114

In [36]:
list(model.children())[0][0].weight.shape

torch.Size([16, 1, 7])

In [50]:
"""
Conv1d(1, 16, kernel_size=(7,), stride=(2,), padding=(3,)

def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
    if self.padding_mode != 'zeros':
        return F.conv1d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
                        weight, bias, self.stride,
                        _single(0), self.dilation, self.groups)
    return F.conv1d(input, weight, bias, self.stride, self.padding, self.dilation, self.groups)
"""
import torch.nn.functional as F
for i, spectrum in enumerate(nan_output):
    flux = torch.from_numpy(fits.getdata(f'removed/{spectrum}').astype(np.float32))
    flux = flux.unsqueeze(0)


    weights = list(model.children())[0][0].weight
    weights = torch.zeros_like(weights)
    bias = list(model.children())[0][0].bias

    out0 = F.conv1d(flux, weights, bias, stride=(2,), padding=(3,), dilation=1, groups=1)

    print(i, out0.isnan().any())

    

0 tensor(True)
1 tensor(True)
2 tensor(True)
3 tensor(True)
4 tensor(True)
5 tensor(True)
6 tensor(True)
7 tensor(True)
8 tensor(True)
9 tensor(True)
10 tensor(True)
11 tensor(True)
12 tensor(True)
13 tensor(True)
14 tensor(True)
15 tensor(True)
16 tensor(True)
17 tensor(True)
18 tensor(True)
19 tensor(True)
20 tensor(True)
21 tensor(True)
22 tensor(True)
23 tensor(True)
24 tensor(True)
25 tensor(True)
26 tensor(True)
27 tensor(True)
28 tensor(True)
29 tensor(True)
30 tensor(True)
31 tensor(True)
32 tensor(True)
33 tensor(True)
34 tensor(True)
35 tensor(True)
36 tensor(True)
37 tensor(True)
38 tensor(True)
39 tensor(True)
40 tensor(True)
41 tensor(True)
42 tensor(True)
43 tensor(True)
44 tensor(True)
45 tensor(True)
46 tensor(True)
47 tensor(True)
48 tensor(True)
49 tensor(True)
50 tensor(True)
51 tensor(True)
52 tensor(True)
53 tensor(True)
54 tensor(True)
55 tensor(True)
56 tensor(True)
57 tensor(True)
58 tensor(True)
59 tensor(True)
60 tensor(True)
61 tensor(True)
62 tensor(True)
63

In [44]:
out0.shape

torch.Size([1, 16, 163840])

In [49]:
print(np.where(out0.isnan())[:, 0])

TypeError: tuple indices must be integers or slices, not tuple

In [41]:
fname = os.listdir('preprocessed/')[0]
flux = torch.from_numpy(fits.getdata(f'preprocessed/{fname}').astype(np.float32))
flux = flux.unsqueeze(0)

weights = list(model.children())[0][0].weight
bias = list(model.children())[0][0].bias

out0 = F.conv1d(flux, weights, bias, stride=(2,), padding=(3,), dilation=1, groups=1)

print(out0.isnan().any())

tensor(False)


In [9]:
print(np.min([np.min(fits.getdata(f'removed/{fname}')) for fname in os.listdir('removed/')]))

nan
nan


In [16]:
for i, fname in enumerate(os.listdir('removed/')):
    flux = fits.getdata(f'removed/{fname}', ext=0)
    if np.isnan(np.min(flux)):
        print(i, fname, np.min(flux), np.argmin(flux))

228 ADP.2014-10-01T10:20:24.770.fits nan 0
229 ADP.2014-10-01T10:22:14.220.fits nan 0
230 ADP.2014-10-01T10:23:18.623.fits nan 0
265 ADP.2014-10-01T10:23:00.420.fits nan 0
266 ADP.2014-10-01T10:19:57.883.fits nan 0
267 ADP.2014-10-01T10:21:41.850.fits nan 0
270 ADP.2014-10-01T10:22:21.423.fits nan 0
272 ADP.2014-10-01T10:19:06.723.fits nan 0
276 ADP.2014-10-01T10:22:16.693.fits nan 0
291 ADP.2014-10-01T10:21:38.787.fits nan 0
294 ADP.2014-10-01T10:23:19.653.fits nan 0
319 ADP.2014-10-01T10:23:23.360.fits nan 0
320 ADP.2014-10-01T10:20:17.697.fits nan 0
321 ADP.2014-10-01T10:23:10.527.fits nan 0
1323 ADP.2014-10-01T10:19:09.597.fits nan 0
1324 ADP.2014-10-01T10:19:49.723.fits nan 0
1325 ADP.2014-10-01T10:19:35.093.fits nan 0
1436 ADP.2014-10-01T10:22:18.587.fits nan 0
1437 ADP.2014-10-01T10:21:59.643.fits nan 0
1438 ADP.2014-10-01T10:19:26.093.fits nan 0
1540 ADP.2014-10-01T10:21:24.530.fits nan 0
1547 ADP.2014-10-01T10:20:20.907.fits nan 0
1548 ADP.2014-10-01T10:21:03.797.fits nan 0
18

KeyboardInterrupt: 

In [16]:
for i, fname in enumerate(os.listdir('preprocessed/')):
    flux = fits.getdata(f'preprocessed/{fname}', ext=0)
    if np.isnan(np.min(flux)):
        print(i, fname, np.min(flux), np.argmin(flux))

KeyboardInterrupt: 

In [7]:
for i, fname in enumerate(fnames):
    try:
        flux = fits.getdata(f'preprocessed/{fname}', ext=0)

        if not np.isfinite(flux).all():
            print(i, fname)
    except FileNotFoundError as e:
        pass
        

In [8]:
for i, fname in enumerate(os.listdir('preprocessed/')):
    try:
        flux = fits.getdata(f'removed2/{fname}', ext=0)

        if not np.isfinite(flux).all():
            print(i, fname)
    except FileNotFoundError as e:
        pass


KeyboardInterrupt: 

In [9]:
for i, fname in enumerate(os.listdir('preprocessed/')):
    if i % 1000 == 0:
        print(i)
    try:
        flux = fits.getdata(f'removed/{fname}', ext=0)

        if not np.isfinite(flux).all():
            print(i, fname)
    except FileNotFoundError as e:
        pass


KeyboardInterrupt: 