In [None]:
''' 
June 16 2023 

@author: Grady Robbins

A Jupyter notebook which analyzes the Cool Neighbors subjects. Contains a cross match, an evaluation of the target's type accuracies, 
and an evaluation of all targets 'yes' votes for each type returning desired subjects
'''

In [None]:
''' Cross-Match

A script to determine if candidates are present in a subject set using a csv of IDs to check.

Subject ID is cross referenced with original subject set to find RA and DEC.

Then RA and DEC are searched for in the csv file. If present, the candidate information is returned.
'''

import csv
import re
import numpy as np

def limit_RA_DEC(RA,DEC,CharacterCount):
    '''
    This function limits the RA and DEC values to be x characters in length for subject matching.
    parameters:
    RA - list of float DEC values, should be in degrees
    DEC - list of float DEC values, should be in degrees
    returns:
    --------------------------------
    RA - shortened list of float RA values in degrees
    DEC - shortened list of float DEC values in degrees
    '''
    for k in range(len(RA)):
        RAlist = [*str(RA[k])]
        DEClist = [*str(DEC[k])]
        RA[k] = ''.join(RAlist[0:CharacterCount])
        DEC[k] = ''.join(DEClist[0:CharacterCount])
    return RA,DEC

def FindSubjectCoords(filename = str):
    '''
    This function finds the RA and DEC coordinates for all subjects in a subject csv file.

    Parameters:
    filename - path to subject csv file
    
    Returns:
    --------------------------------
    RA - list of str RA values in degrees
    DEC - list of str DEC values in degrees
    '''
    RA_subjects = []
    DEC_subjects = []
    with open(filename, 'r', newline='') as file: #load in file
        for line in file:
            line_list = re.split(',', line)
            for k in range(len(line_list)):# iterate each line and return RA and DEC for all subjects
                new_ra_list = []
                new_dec_list = []
                if '"RA"":""' in line_list[k]:
                    ra_temp_list = [*line_list[k]]
                    for l in range(len(ra_temp_list)):
                        if ra_temp_list[l].isnumeric() or ra_temp_list[l] == '.':
                            new_ra_list.append(ra_temp_list[l])
                    RA_subjects.append(''.join(new_ra_list))
                if '"DEC"":""' in line_list[k]:
                    dec_temp_list = [*line_list[k]]
                    for l in range(len(dec_temp_list)):
                        if dec_temp_list[l].isnumeric() or dec_temp_list[l] == '.':
                            new_dec_list.append(dec_temp_list[l])
                    DEC_subjects.append(''.join(new_dec_list))
    return RA_subjects, DEC_subjects

def FindTargetIDs(filename = str):
    '''
    This function finds target IDs for cross matching from csv containing only subject IDs
    
    Parameters:
    filename - path to candidate csv file
    
    Returns:
    --------------------------------
    target_IDs - a list of target IDs to match with a larger subject set
    '''
    target_IDs = []
    with open(filename, 'r') as file:
        for line in file:
            target_IDs.append(str(int(line)))
    return target_IDs

def FindCandidateCoords(larger_candidate_filename = str, target_IDs = list):
    '''
    This function grabs the RA and DEC coordinates for all candidates in a subject source csv file using subject ID.
    
    Parameters:
    larger_candidate_filename - path to larger candidate csv with all metadata
    candidate_IDs - a list of subject IDs to match with a larger subject set    
    Returns:
    --------------------------------
    RA - list of str RA values in degrees
    DEC - list of str DEC values in degrees
    '''
    RA = []
    DEC = []
    with open(larger_candidate_filename, 'r', newline='') as file:
            for line in file:
                line_list = re.split(',', line)
                for target_ID in target_IDs:
                    if target_ID == line_list[0]:
                        RA.append(line_list[1])
                        DEC.append(line_list[2])
    print(len(RA), 'candidates to check for presence')
    return RA, DEC

def MatchRADEC(RA_candidates,DEC_candidates, RA_subjects, DEC_subjects, target_IDs = None):
    '''
    This function matches the RA and DEC coordinates for all candidates to a larger subject set and prints the matching RA, DEC, and subject IDs.
    
    Parameters:
    RA_candidates - list of str RA values in degrees for smaller candidate set
    DEC_candidates - list of str DEC values in degrees for smaller candidate set
    RA_subjects - list of str RA values in degrees for larger subject set
    DEC_subjects - list of str DEC values in degrees for larger subject set
    target_IDs - a list of subject IDs to print in event of a match
    Returns:
    --------------------------------
    None
    '''
    cross_match = 0
    for k in range(len(RA_candidates)):
        if k in 500*np.arange(0,1001): #print progress
            print('step',k)
        for l in range(len(RA_subjects)): #iterate through subjects, if a subject is present in both files print subject data
            if RA_candidates[k] in RA_subjects[l] and DEC_candidates[k] in DEC_subjects[l]:
                if target_IDs is not None:
                    print('present RA, DEC, subject_ID:',RA_candidates[k],DEC_candidates[k],target_IDs[k])
                else:
                    print('present RA, DEC:',RA_candidates[k],DEC_candidates[k])
                cross_match +=1
    print(cross_match,'total matching IDs')

#use functions and change filenames
RAsubjects, DECsubjects = FindSubjectCoords(r'Subjects.csv') # Zooniverse subjects file
targetIDs = FindTargetIDs(r'CandidateIDs.csv') # list of the subject IDs to compare, can comment out if not needed
RAcandidates, DECcandidates = FindCandidateCoords(r'TargetFile.csv', targetIDs) # csv of all target (RA, DEC, target type), can delete targetIDs if needed

print(len(RAsubjects),'total subjects present')
print(len(RAcandidates),'total candidates present')

MatchRADEC(RAcandidates,DECcandidates, RAsubjects, DECsubjects, targetIDs)

In [None]:
""" Subject Results

A script to find subjects from Zooniverse classification file and target file which have movement above a specified threshold.

Returns target type CSVs with movement ratio, RA, DEC, Zooniverse subject link, and WiseView link titled as 'typexresults.csv' where x is type.
"""

import numpy as np
import pandas as pd
import math
import re
import csv
import copy
from alive_progress import alive_bar; import time

#load in coordinates, classifications, and targets
ClassificationsFile = r'Launch_Classifications6_29_23.csv'

Types = ['1','4','8','16','32']#define values for type of target, here 2^(0,1,2,3,4,5) used
acceptance_threshold = 4 # define minimum number of movement votes for a target to be recorded

#Define dictionaries

CoordinateMovement = {Types[0]:{'RA': [], 'DEC': [], 'zoolink': [], 'bywlink': [], 'movement': [], 'total': []},
                      Types[1]:{'RA': [], 'DEC': [], 'zoolink': [], 'bywlink': [], 'movement': [], 'total': []},
                      Types[2]:{'RA': [], 'DEC': [], 'zoolink': [], 'bywlink': [], 'movement': [], 'total': []},
                      Types[3]:{'RA': [], 'DEC': [], 'zoolink': [], 'bywlink': [], 'movement': [], 'total': []},
                      Types[4]:{'RA': [], 'DEC': [], 'zoolink': [], 'bywlink': [], 'movement': [], 'total': []}}

def returnfloat(string):
    number = []
    for i in (string):
        if i.isdigit() or i == '.' or i == '-':
            number.append(i)
    return float(''.join(number))

#separate data by type and remove bad IDs

bad_ID = [] # these IDs should not be counted, as the targets are not verified
ClassificationText = open(ClassificationsFile, 'r')

ClassificationCounter = -1
#get all data for each type and separate them in dictionaries
for line in ClassificationText:
    ClassificationCounter +=1
count=-1
ClassificationText = open(ClassificationsFile, 'r')
with alive_bar(ClassificationCounter) as bar:
    for line in ClassificationText:
        bar()
        count += 1
        if count == 0:
            continue
        itemlist = re.split(',',line)
        for item in itemlist:
            if 'RA"":' in item:
                RA = returnfloat(item)
            if 'DEC"":' in item:
                DEC = returnfloat(item)
            if '#BITMASK"":' in item:
                Type = str(int(returnfloat(item)))
            if '""ID"":""' in item:
                ID = str(int(returnfloat(item)))
                if ID in bad_ID:
                    continue
        if str(RA) in CoordinateMovement[Type]['RA'] and str(DEC) in CoordinateMovement[Type]['DEC']:
            for i in range(len(CoordinateMovement[Type]['RA'])):
                if RA == CoordinateMovement[Type]['RA']:
                    RA_index = i
        else:
            bywlink = ('http://byw.tools/wiseview#ra='+str(RA)+'&dec='+str(DEC)+'&size=176&band=3&speed=20&minbright=-50.0000&maxbright=500.0000&window=0.5&diff_window=1&linear=1&color=&zoom=9&border=0&gaia=1&invert=1&maxdyr=0&scandir=0&neowise=0&diff=0&outer_epochs=0&unique_window=1&smooth_scan=0&shift=0&pmra=0&pmdec=0&synth_a=0&synth_a_sub=0&synth_a_ra=&synth_a_dec=&synth_a_w1=&synth_a_w2=&synth_a_pmra=0&synth_a_pmdec=0&synth_a_mjd=&synth_b=0&synth_b_sub=0&synth_b_ra=&synth_b_dec=&synth_b_w1=&synth_b_w2=&synth_b_pmra=0&synth_b_pmdec=0&synth_b_mjd=')
            zoolink = ('https://www.zooniverse.org/projects/coolneighbors/backyard-worlds-cool-neighbors/talk/subjects/'+str(ID))
            CoordinateMovement[Type]['RA'].append(str(RA))
            CoordinateMovement[Type]['DEC'].append(str(DEC))
            CoordinateMovement[Type]['bywlink'].append(bywlink)
            CoordinateMovement[Type]['zoolink'].append(zoolink)
            CoordinateMovement[Type]['movement'].append(0)
            CoordinateMovement[Type]['total'].append(0)
            RA_index = -1
        if '"Yes"' in line:
            CoordinateMovement[Type]['movement'][RA_index] += 1
            CoordinateMovement[Type]['total'][RA_index] += 1
        if '"No"' in line:
            CoordinateMovement[Type]['total'][RA_index] += 1
        
for i in Types:
    with open(r'type'+str(i)+'results.csv','w', newline = '') as file:
        writer = csv.writer(file)
        for k in range(len(CoordinateMovement[i]['bywlink'])):
            if CoordinateMovement[i]['total'][k] != 0:
                if CoordinateMovement[i]['movement'][k] >= acceptance_threshold:
                    writer.writerow([(CoordinateMovement[i]['movement'][k])/(CoordinateMovement[i]['total'][k]),CoordinateMovement[i]['RA'][k],CoordinateMovement[i]['DEC'][k],CoordinateMovement[Type]['zoolink'][k],CoordinateMovement[i]['bywlink'][k]])


In [None]:
""" Type Evaluation

This script analyzes total movement ratio of subjects separated by type. Useful for larger reviews, not for individual classification.

Can also specify specific subject IDs that should not be counted in results
"""

import numpy as np
import math
import re
import csv
from alive_progress import alive_bar; import time

Types = ['1','4','8','16','32'] # enter type values here

#load in file
ClassificationFile = r'Classifications_File'

ClassificationText = open(ClassificationFile, "r")


typeRA = {Types[0]:[],Types[1]:[],Types[2]:[],Types[3]:[],Types[4]:[]}
typeDEC = {Types[0]:[],Types[1]:[],Types[2]:[],Types[3]:[],Types[4]:[]}
typedata = {Types[0]:[],Types[1]:[],Types[2]:[],Types[3]:[],Types[4]:[]}

#generate searchable patterns for each target by type
patterntype = {Types[0]:'#BITMASK"":""'+Types[0]+'"',Types[1]:'"#BITMASK"":""'+Types[1]+'"',Types[2]:'"#BITMASK"":""'+Types[2]+'"',Types[3]:'"#BITMASK"":""'+Types[3]+'"',Types[4]:'"#BITMASK"":""'+Types[4]+'"'}

#read in data and search for specific type

ClassificationCounter = -1
move_count = {Types[0]:0,Types[1]:0,Types[2]:0,Types[3]:0,Types[4]:0}
nomove_count = {Types[0]:0,Types[1]:0,Types[2]:0,Types[3]:0,Types[4]:0}

for line in ClassificationText:
    ClassificationCounter +=1
with alive_bar(ClassificationCounter*len(Types)) as bar:
    for i in Types:
        ClassificationText = open(ClassificationFile,'r')
        for line in ClassificationText:
            time.sleep(.00001)
            bar()
            if re.search(patterntype[i], line):
                if "Yes" in line:
                    move_count[i] +=1
                elif "No" in line:
                    nomove_count[i] +=1
        ClassificationText.close()
        print('there were',move_count[i],'movement decisions and',nomove_count[i],'nonmovement decisions on type',i,'targets, giving a movement ratio of', (move_count[i])/(move_count[i]+nomove_count[i])*100, '%')

print('Classifications Read')

print('there are ',move_count[Types[0]]+move_count[Types[1]]+move_count[Types[2]]+move_count[Types[3]]+move_count[Types[4]]+nomove_count[Types[0]]+nomove_count[Types[1]]+nomove_count[Types[2]]+nomove_count[Types[3]]+nomove_count[Types[4]],'classifications counted and',ClassificationCounter,'total classifications.')
