# Training Data Select

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import time
import warnings
from tqdm import notebook

from mwv_source import MultiWVSource

warnings.filterwarnings('ignore')

## Import all interesting sources lists

In [2]:
# ra(J2000),dec(J2000),amp(Jy/beam),rms(Jy/beam),FWHMy(arcsec),FWHMx(arcsec),Bpa(degrees),FieldName,png image link,NED Lookup,[[prob Compact, prob Complex]]
sourceTable = np.loadtxt('outcatT14.csv', dtype=str, delimiter=',', comments='#')
sourceTable = np.append(sourceTable, np.loadtxt('outcatT16.csv', dtype=str, delimiter=',', comments='#'), axis=0)
sourceTable = np.append(sourceTable, np.loadtxt('outcatT18.csv', dtype=str, delimiter=',', comments='#'), axis=0)
sourceTable = np.append(sourceTable, np.loadtxt('outcatT20.csv', dtype=str, delimiter=',', comments='#'), axis=0)
sourceTable = np.append(sourceTable, np.loadtxt('outcatT21.csv', dtype=str, delimiter=',', comments='#'), axis=0)
sourceTable = np.append(sourceTable, np.loadtxt('outcatT22.csv', dtype=str, delimiter=',', comments='#')[:,0:11], axis=0)
sourceTable = np.append(sourceTable, np.loadtxt('outcatT23.csv', dtype=str, delimiter=',', comments='#')[:,0:11], axis=0)
sourceTable = np.append(sourceTable, np.loadtxt('outcatT24.csv', dtype=str, delimiter=',', comments='#')[:,0:11], axis=0)

ra = np.array(sourceTable[:,0].astype(float))
dec = np.array(sourceTable[:,1].astype(float))
complexity = np.array([float(sub.replace('[', '').replace(']', '').split()[1]) for sub in list(sourceTable[:,10])])

complexity, ra, dec = map(list, zip(*sorted(zip(complexity, ra, dec), reverse=True)))
complexity = np.array(complexity); ra = np.array(ra); dec = np.array(dec)
initialized = np.zeros(len(complexity))

for i in range(0, 10):
    print('%.1f to %.1f: %i' % (1-0.1*(i+1), 1-0.1*i, complexity[(complexity > 1-0.1*(i+1)) & (complexity < 1-0.1*i)].size))

0.9 to 1.0: 526
0.8 to 0.9: 703
0.7 to 0.8: 1225
0.6 to 0.7: 2215
0.5 to 0.6: 3809
0.4 to 0.5: 7272
0.3 to 0.4: 14379
0.2 to 0.3: 24552
0.1 to 0.2: 19501
0.0 to 0.1: 2109


## Generate training data quanta

In [3]:
num_ranges = 10
source_per_range = 10

num_quanta = int(np.min([complexity[(complexity > 1-1/num_ranges*(i+1)) & (complexity < 1-1/num_ranges*i)].size 
    for i in range(0, int(num_ranges))])/source_per_range)

num_quanta = 10
print('Generating %i quanta holding %i ranges with %i sources per range for %i training sources...' % 
    (num_quanta, int(num_ranges), source_per_range, num_quanta*source_per_range*num_ranges))

pb_0 = notebook.tqdm(range(num_quanta), total=num_quanta, desc='All', colour='red', position=0)
pb_1 = notebook.tqdm(range(num_ranges), total=num_ranges, desc='Subranges', colour='blue', position=1)
pb_2 = notebook.tqdm(range(source_per_range), total=source_per_range, desc='Sources', colour='green', position=2)
for i in range(num_quanta):
    sources = []
    for j in range(num_ranges):
        condition = (complexity > 1-(1/num_ranges)*(j+1)) & (complexity < 1-(1/num_ranges)*j) & (initialized == 0)
        indices = np.random.permutation(np.arange(0, complexity[condition].size))
        temp = initialized[condition]
        for k in range(source_per_range):
            while True: 
                try: 
                    sources.append(MultiWVSource(ra[condition][indices[k]], dec[condition][indices[k]]))
                    temp[indices[k]] = 1
                    time.sleep(0.1)
                    break
                except:
                    time.sleep(1)
                    continue
            pb_2.update()
        pb_2.refresh(); pb_2.reset()
        initialized[condition] = temp
        pb_1.update()
    with open('unlabeled_data/sources_{}.obj'.format(i), 'wb') as f:
        pickle.dump(sources, f)
    pb_1.refresh(); pb_1.reset()
    pb_0.update()
pb_0.refresh()
print('Done!')

Generating 10 quanta holding 10 ranges with 10 sources per range for 1000 training sources...


All:   0%|          | 0/10 [00:00<?, ?it/s]

Subranges:   0%|          | 0/10 [00:00<?, ?it/s]

Sources:   0%|          | 0/10 [00:00<?, ?it/s]

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/will/miniforge3/lib/python3.10/multiprocessing/spawn.py", line 112, in spawn_main
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/will/miniforge3/lib/python3.10/multiprocessing/spawn.py", line 116, in spawn_main
    exitcode = _main(fd, parent_sentinel)
  File "/Users/will/miniforge3/lib/python3.10/multiprocessing/spawn.py", line 126, in _main
    from . import resource_tracker
  File "/Users/will/miniforge3/lib/python3.10/multiprocessing/resource_tracker.py", line 38, in <module>
    self = reduction.pickle.load(from_parent)
  File "/Users/will/miniforge3/lib/python3.10/concurrent/futures/process.py", line 52, in <module>
    import _posixshmem
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1002, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 945, in _find_spec
  Fil