In [0]:
import pickle, time
from google.colab import files
import numpy as np
import matplotlib.pyplot as plt
from IPython import display
%matplotlib inline

The following code block will upload the raw data saved as a dictionary in a pickle file. 

The data files are all located in the Drive folder "Data Labeling/To-Do." They should be uploaded and worked on one at a time. Once the labeling is complete, the file will be saved as `basename_labeled.pickle` and this file should then be uploaded back to Drive under "Data Labeling/Completed."

In [0]:
print('Please select the data file for upload')
Dfiles = files.upload()
print(f'Using {list(Dfiles.keys())} as the data files')

In [0]:
# Process uploaded data into labeled format

# First figure out the file names
in_name = list(Dfiles.keys())[0]
tag = 'revised'
if tag not in in_name: #create a new out_name if necesary
  out_l = in_name.split('.')
  out_l.insert(-1,'_' + tag + '.')
  out_name = ''.join(out_l)
else:
  out_name = in_name
print(f'[INFO] Using {in_name} as input file and {out_name} as output file')

# Extract the data from the file
file = Dfiles[in_name] #get the actual string in file
data = pickle.loads(file) #load the data from string

# Ensure that length is specified
if ('length' not in data.keys()):
  data['length'] = len(data['Image'])
# Ensure that the data is shuffled
if ('shuffled' not in data.keys() or not data['shuffled']):
  print('[INFO] shuffling image order')
  redo = np.random.permutation(data['length'])
  data['Image'] = data['Image'][redo]
  data['Epoch'] = data['Epoch'][redo]
  data['shuffled'] = True
  
# Create the label list if necesary
if ( 'label' not in data.keys() ):
  print('[INFO] Creating labels list')
  data['label'] = np.array([None] * data['length'])


The next block of code will show you images from the data file in a random order. For each image you will have to input a letter to label the image. 
 - `a` should be the for **A**urora. Use it for whenver you can clearly see an aurora present
  - Examples: 
  
  
  ![Aurora_1](https://raw.githubusercontent.com/bSolt/Project_HEPCATS/master/ips/cdf/aurora_37.png)
  ![Aurora_2](https://raw.githubusercontent.com/bSolt/Project_HEPCATS/master/ips/cdf/aurora_48.png)
  ![Aurora_3](https://raw.githubusercontent.com/bSolt/Project_HEPCATS/master/ips/cdf/aurora_65.png)
  
  
 - `b` should be for **B**lack or no aurora
  - Examples:
  
  
  ![NoAurora_1](https://raw.githubusercontent.com/bSolt/Project_HEPCATS/master/ips/cdf/noaurora_7.png)
  ![NoAurora_13](https://raw.githubusercontent.com/bSolt/Project_HEPCATS/master/ips/cdf/noaurora_13.png)
  
  
 - `c` should be for **C**annot decide. 
  - This input should be used for image which do not clearly show the earth, images that are extremely noisy and shouldn't be used, or images which you are not comfortable labeling as either `a` or `b`
  - These images will not be used for training so only assign this label if the images seem non-useful
  - Examples:
  
  
  ![Garbage_1](https://raw.githubusercontent.com/bSolt/Project_HEPCATS/master/ips/cdf/baddata_125.png)
  ![Garbage_2](https://raw.githubusercontent.com/bSolt/Project_HEPCATS/master/ips/cdf/baddata_70.png)
  
  
 - `p` can be input to return to the **P**revious image if you think you made a mistake or just want to double check.
 - `n` can be used similarly to go to the **N**ext image
 - `e` can be used to go to the **E**nd of the labeled images and show the first unlabeled image
 - `s` can be input at any time to **S**ave your work and download the labeled data file up to this point. This is useful if you need to take a break, but also if you just want to save your work. You might want to save your work often because the runtime can disconnect at times.
 
 
The program will automatically finish when all images have been labeled and the labeled data file will be downloaded.

In [0]:
# define the function for displaying an image
def display_current(data,i):
  time.sleep(0.5)
  plt.figure(figsize=(10,10))
  plt.imshow(data['Image'][i],cmap='nipy_spectral')
  plt.grid(False)
  display.clear_output(wait=False)
  display.display(plt.gcf())
  print(f"Index:{i}/{data['length']}, Current Classification:{data['label'][i]}")
# define the functions for different cases
def label_aurora(data,i):
  print(f"Label set from {data['label'][i]} to a")
  data['label'][i] = 'a'
  return i+1
def label_black(data,i):
  print(f"Label set from {data['label'][i]} to b")
  data['label'][i] = 'b'
  return i+1
def label_none(data,i):
  print(f"Label set from {data['label'][i]} to c")
  data['label'][i] = 'c'
  return i+1
def go_previous(data,i):
  return i-1
def go_next(data,i):
  return i+1
def go_end(data,i):
#   go_end returns the index of the first unlabeled image
  for i, label in enumerate(data['label']):
    if label is None:
      return i
  return data['length']
def save_down(data,*args): 
#note: additional arguments may be passed but are not used
  with open(out_name,'wb') as file:
    pickle.dump(data,file)
  files.download(out_name)
  return i
 

 # define the cases dictionary
cases = {
    'a': label_aurora,
    'b': label_black,
    'c': label_none,
    'p': go_previous,
    's': save_down,
    'n': go_next,
    'e': go_end
}

In [0]:
done = False
# Set an index to keep track of where we are in the data
i=0
# execute until done
while (not done):
  display_current(data,i)
  com = input('Command: ')
#   check if input is invalid
  if (len(com)>0):
    com = com[0].lower()
  else:
    print('blank was inputted. Interpreting as next')
    com = 'n'
    time.sleep(0.1)
  if (com not in cases.keys()): #in which case do nothing
    print(f'Input must be in {list(cases.keys())}, but your input was {com}')
    time.sleep(1)
    continue
#   otherwise...
#   get the appropriate action for the command
  action = cases.get(com)
#   execute the action on the data at the current index
  i = action(data,i)
#   check if done
  done = (i>=data['length'])
    
print(f"Labeling is complete! {data['length']} images labeled, download starting...")
save_down(data,i)