### Retrieve color information from JSON files in the color directories and merge DECam and PS outputs

23 Jan 2025, J. Jensen and M. Cowles

In [1]:
import sys, os
import json
import csv
import datetime
import pandas as pd

# now = datetime.now()

def find_PS_files(root_dir):
    """Recursively find all PanSTARRS JSON files in the directory and subdirectories."""
    ps_files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.endswith('color_ps_gz.json'):
                ps_files.append(os.path.join(dirpath, filename))
    return ps_files

def find_DECam_files(root_dir):
    """Recursively find all DECam JSON files in the directory and subdirectories."""
    decam_files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.endswith('color_dc.json'):
                decam_files.append(os.path.join(dirpath, filename))
    return decam_files

def retrieve_PS_values(json_file_path, cutoffdate):
    """Retrieve color values for PanSTARRS."""
    with open(json_file_path, 'r') as file:
        data = json.load(file)
        
        # Initialize a dictionary to store the retrieved values
        retrieved_data = {
            'gzc0_PS': None,
            'gzc0sig_PS': None,
            'gzc1_PS': None,
            'gzc1sig_PS': None,
            'gzc2_PS': None,
            'gzc2sig_PS': None,
            'gzc5_PS': None,
            'gzc5sig_PS': None,
            'Name_PS': None,
            'PGC': None,
            'Color_PS': None,
            'Date_PS': None,
            'Version_PS': None,
            'extg_PS': None,
            'extz_PS': None,
        }
        
        retrieved_data['Name_PS'] = data.get("Name")
        retrieved_data['PGC'] = data.get("PGC")
        retrieved_data['Color_PS'] = data.get("Color")
        retrieved_data['Date_PS'] = data.get("Date and time run")
        retrieved_data['Version_PS'] = data.get("Code version")
       
        if (str(retrieved_data['Date_PS']) == 'None'):
            return
        elif (retrieved_data['Date_PS'] < cutoffdate):
            return
        else:
            for item in data.get("Extinction", []):
                if "Extinction g (mag)" in item:
                    retrieved_data['extg_PS'] = item["Extinction g (mag)"]
                elif "Extinction z (mag)" in item:
                    retrieved_data['extz_PS'] = item["Extinction z (mag)"]
                    break
                    
            for item in data.get("g-z", []):
                if "(g-z) in c0" in item:
                    retrieved_data['gzc0_PS'] = item["(g-z) in c0"]
                elif "(g-z) in c1" in item:
                    retrieved_data['gzc1_PS'] = item["(g-z) in c1"]
                elif "(g-z) in c2" in item:
                    retrieved_data['gzc2_PS'] = item["(g-z) in c2"]
                elif "(g-z) in c5" in item:
                    retrieved_data['gzc5_PS'] = item["(g-z) in c5"]               
                    break

            for item in data.get("g-z uncertainty", []):
                if "(g-z) sigma in c0" in item:
                    retrieved_data['gzc0sig_PS'] = item["(g-z) sigma in c0"]
                elif "(g-z) sigma in c1" in item:
                    retrieved_data['gzc1sig_PS'] = item["(g-z) sigma in c1"]
                elif "(g-z) sigma in c2" in item:
                    retrieved_data['gzc2sig_PS'] = item["(g-z) sigma in c2"]
                elif "(g-z) sigma in c5" in item:
                    retrieved_data['gzc5sig_PS'] = item["(g-z) sigma in c5"]
                    break
       
            return retrieved_data

def retrieve_DC_values(json_file_path, cutoffdate):
    """Retrieve DECam color values."""
    with open(json_file_path, 'r') as file:
        data = json.load(file)
        
        # Initialize a dictionary to store the retrieved values
        retrieved_data = {
            'gzc0_DC': None,
            'gzc0sig_DC': None,
            'gzc1_DC': None,
            'gzc1sig_DC': None,
            'gzc2_DC': None,
            'gzc2sig_DC': None,
            'gzc5_DC': None,
            'gzc5sig_DC': None,
            'gic0_DC': None,
            'gic0sig_DC': None,
            'gic1_DC': None,
            'gic1sig_DC': None,
            'gic2_DC': None,
            'gic2sig_DC': None,
            'gic5_DC': None,
            'gic5sig_DC': None,
            'Name_DC': None,
            'PGC': None,
            'Color_DC': None,
            'Date_DC': None,
            'Version_DC': None,
            'extg_DC': None,
            'exti_DC': None,
            'extz_DC': None,
        }
        
        retrieved_data['Name_DC'] = data.get("Name")
        retrieved_data['PGC'] = data.get("PGC")
        retrieved_data['Color_DC'] = data.get("Color")
        retrieved_data['Date_DC'] = data.get("Date and time run")
        retrieved_data['Version_DC'] = data.get("Code version")
       
        if (str(retrieved_data['Date_DC']) == 'None'):
            return
        elif (retrieved_data['Date_DC'] < cutoffdate):
            return
        else:
            for item in data.get("Extinction", []):
                if "Extinction g (mag)" in item:
                    retrieved_data['extg_DC'] = item["Extinction g (mag)"]
                elif "Extinction i (mag)" in item:
                    retrieved_data['exti_DC'] = item["Extinction i (mag)"]
                elif "Extinction z (mag)" in item:
                    retrieved_data['extz_DC'] = item["Extinction z (mag)"]
                    break
                    
            for item in data.get("g-z", []):
                if "(g-z) in c0" in item:
                    retrieved_data['gzc0_DC'] = item["(g-z) in c0"]
                elif "(g-z) in c1" in item:
                    retrieved_data['gzc1_DC'] = item["(g-z) in c1"]
                elif "(g-z) in c2" in item:
                    retrieved_data['gzc2_DC'] = item["(g-z) in c2"]
                elif "(g-z) in c5" in item:
                    retrieved_data['gzc5_DC'] = item["(g-z) in c5"]               
                    break

            for item in data.get("g-z uncertainty", []):
                if "(g-z) sigma in c0" in item:
                    retrieved_data['gzc0sig_DC'] = item["(g-z) sigma in c0"]
                elif "(g-z) sigma in c1" in item:
                    retrieved_data['gzc1sig_DC'] = item["(g-z) sigma in c1"]
                elif "(g-z) sigma in c2" in item:
                    retrieved_data['gzc2sig_DC'] = item["(g-z) sigma in c2"]
                elif "(g-z) sigma in c5" in item:
                    retrieved_data['gzc5sig_DC'] = item["(g-z) sigma in c5"]
                    break
                     
            for item in data.get("g-i", []):
                if "(g-i) in c0" in item:
                    retrieved_data['gic0_DC'] = item["(g-i) in c0"]
                elif "(g-i) in c1" in item:
                    retrieved_data['gic1_DC'] = item["(g-i) in c1"]
                elif "(g-i) in c2" in item:
                    retrieved_data['gic2_DC'] = item["(g-i) in c2"]
                elif "(g-i) in c5" in item:
                    retrieved_data['gic5_DC'] = item["(g-i) in c5"]               
                    break

            for item in data.get("g-i uncertainty", []):
                if "(g-i) sigma in c0" in item:
                    retrieved_data['gic0sig_DC'] = item["(g-i) sigma in c0"]
                elif "(g-i) sigma in c1" in item:
                    retrieved_data['gic1sig_DC'] = item["(g-i) sigma in c1"]
                elif "(g-i) sigma in c2" in item:
                    retrieved_data['gic2sig_DC'] = item["(g-i) sigma in c2"]
                elif "(g-i) sigma in c5" in item:
                    retrieved_data['gic5sig_DC'] = item["(g-i) sigma in c5"]
                    break
       
            return retrieved_data
    

In [2]:
# Look through the files and create a temporary csv file with the combined results
def process_json_files(root_dir, ps_cutoff, decam_cutoff):
    """Process all JSON files in the directory and write specific values to a CSV file."""
    PS_files = find_PS_files(root_dir)
    DECam_files = find_DECam_files(root_dir)
    
    with open('tmpPSout.csv', 'w', newline='') as pscsv:
        fieldnames = ['Name_PS','PGC','gzc0_PS','gzc0sig_PS','gzc1_PS','gzc1sig_PS','gzc2_PS','gzc2sig_PS','gzc5_PS','gzc5sig_PS','Color_PS','Date_PS','Version_PS','extg_PS','extz_PS']
        writer = csv.DictWriter(pscsv, fieldnames=fieldnames)
        writer.writeheader()
        for PS_file in PS_files:
            specific_values = retrieve_PS_values(PS_file, ps_cutoff)
            if specific_values:
                writer.writerow(specific_values)

    with open('tmpDCout.csv', 'w', newline='') as dccsv:
        fieldnames = ['Name_DC','PGC','gzc0_DC','gzc0sig_DC','gzc1_DC','gzc1sig_DC','gzc2_DC','gzc2sig_DC','gzc5_DC','gzc5sig_DC','gic0_DC','gic0sig_DC','gic1_DC','gic1sig_DC','gic2_DC','gic2sig_DC','gic5_DC','gic5sig_DC','Color_DC','Date_DC','Version_DC','extg_DC','exti_DC','extz_DC']
        writer = csv.DictWriter(dccsv, fieldnames=fieldnames)
        writer.writeheader()
        for DECam_file in DECam_files:
            specific_values = retrieve_DC_values(DECam_file, decam_cutoff)
            if specific_values:
                writer.writerow(specific_values)
                

In [3]:
# Example usage:
#root_directory = '/Users/Joe/data/PScolors-2024'  # Replace with the root directory path
root_directory = '/Users/Joe/data/'  # Replace with the root directory path

# Cutoff dates
ps_cutoff = '2025-01-03'
decam_cutoff = '2025-01-15'

# Call the function to process the JSON files and write to CSV
process_json_files(root_directory, ps_cutoff, decam_cutoff)


In [4]:
# Now load the two temp files, relabel columns as needed, and merge based on Name or PGC
csv_file_PS = 'tmpPSout.csv'
csv_file_DC = 'tmpDCout.csv'

# Read the CSV file into a DataFrame
dfps = pd.read_csv(csv_file_PS)
dfdc = pd.read_csv(csv_file_DC)   

#dfps = dfps[["Name","PGC","Date","Version","(g-z) in c0","(g-z) sigma in c0","(g-z) in c1","(g-z) sigma in c1","(g-z) in c2","(g-z) sigma in c2","(g-z) in c5","(g-z) sigma in c5"]].dropna()
#dfdc = dfdc[["Name","PGC","Date","Version","(g-z) in c0","(g-z) sigma in c0","(g-z) in c1","(g-z) sigma in c1","(g-z) in c2","(g-z) sigma in c2","(g-z) in c5","(g-z) sigma in c5","(g-i) in c0","(g-i) sigma in c0","(g-i) in c1","(g-i) sigma in c1","(g-i) in c2","(g-i) sigma in c2","(g-i) in c5","(g-i) sigma in c5","extg","exti","extz"]].dropna()

dfdc


Unnamed: 0,Name_DC,PGC,gzc0_DC,gzc0sig_DC,gzc1_DC,gzc1sig_DC,gzc2_DC,gzc2sig_DC,gzc5_DC,gzc5sig_DC,...,gic2_DC,gic2sig_DC,gic5_DC,gic5sig_DC,Color_DC,Date_DC,Version_DC,extg_DC,exti_DC,extz_DC
0,n1375,13266.0,1.135919,0.010702,1.143099,0.010702,1.140892,0.010707,,,...,0.936661,0.009669,,,DECam,2025-01-17 17:22:53.270246,DECam-colors 2025-01-08,0.045,0.022,0.017
1,n5419,50100.0,1.514569,0.017964,1.497684,0.017964,1.47878,0.017964,1.457937,0.017965,...,1.191155,0.015157,1.174853,0.015158,DECam,2025-01-20 18:49:25.988129,DECam-colors 2025-01-19,0.235,0.116,0.088
2,n5018,45908.0,,0.021804,1.171,0.0218,1.158373,0.0218,1.138958,0.021802,...,0.931984,0.018206,0.909768,0.018207,DECam,2025-01-20 17:31:33.165014,DECam-colors 2025-01-19,0.308,0.152,0.116
3,n7187,67909.0,1.329931,0.012435,1.340759,0.01246,1.285059,0.012534,,,...,1.079898,0.01096,,,DECam,2025-01-20 22:34:29.162933,DECam-colors 2025-01-19,0.111,0.055,0.042
4,n5222,47871.0,,0.011807,1.397605,0.011721,1.350907,0.011751,,,...,1.101803,0.010493,,,DECam,2025-01-20 18:24:42.078454,DECam-colors 2025-01-19,0.088,0.043,0.033
5,n4767,43845.0,1.446153,0.024032,1.425686,0.024032,1.392764,0.024033,,,...,1.117536,0.01995,,,DECam,2025-01-20 17:05:56.743295,DECam-colors 2025-01-19,0.347,0.171,0.13
6,eso479-g007,9338.0,1.288939,0.011086,1.288185,0.01113,,,,,...,,,,,DECam,2025-01-20 13:57:31.366021,DECam-colors 2025-01-19,0.063,0.031,0.024
7,mcg-02-33-017,43424.0,1.488631,0.014732,1.507066,0.014731,1.3744,0.014735,1.311574,0.014745,...,1.135223,0.012675,1.090813,0.012686,DECam,2025-01-20 14:02:11.784191,DECam-colors 2025-01-19,0.168,0.083,0.063
8,n5304,49090.0,1.393443,0.015236,1.366593,0.015238,1.320841,0.015245,,,...,1.085145,0.01309,,,DECam,2025-01-20 18:41:47.602154,DECam-colors 2025-01-19,0.18,0.089,0.068
9,eso352-g057,4972.0,1.313028,0.011764,1.290572,0.011802,,,,,...,,,,,DECam,2025-01-20 12:30:30.664640,DECam-colors 2025-01-19,0.09,0.044,0.034


In [5]:
# Merge and rename columns for convenience in space-delimited output files

dfout = pd.merge(dfps, dfdc, on="PGC", how="outer", suffixes=("_PS", "_DC"))
dfout

Unnamed: 0,Name_PS,PGC,gzc0_PS,gzc0sig_PS,gzc1_PS,gzc1sig_PS,gzc2_PS,gzc2sig_PS,gzc5_PS,gzc5sig_PS,...,gic2_DC,gic2sig_DC,gic5_DC,gic5sig_DC,Color_DC,Date_DC,Version_DC,extg_DC,exti_DC,extz_DC
0,n1278,12438,1.394789,0.045086,1.387823,0.053623,,,,,...,,,,,,,,,,
1,n4993,45657,1.253983,0.037167,1.211533,0.039832,,,,,...,,,,,,,,,,
2,n2258,19622,1.426408,0.033202,1.393570,0.035605,1.335276,0.046293,,,...,,,,,,,,,,
3,n1684,16219,1.329308,0.031216,1.285173,0.026571,1.250834,0.035367,,,...,,,,,,,,,,
4,n0507,5098,1.380023,0.027591,1.385220,0.029540,1.407750,0.034772,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,,66041,,,,,,,,,...,1.107192,0.012158,,,DECam,2025-01-20 23:57:36.529556,DECam-colors 2025-01-19,0.151,0.074,0.057
146,,66694,,,,,,,,,...,1.170786,0.010984,1.147543,0.010987,DECam,2025-01-22 14:01:57.240054,DECam-colors 2025-01-19,0.113,0.056,0.042
147,,64041,,,,,,,,,...,1.168685,0.014310,1.150727,0.014311,DECam,2025-01-20 23:02:50.870873,DECam-colors 2025-01-19,0.214,0.106,0.081
148,,64230,,,,,,,,,...,1.116563,0.011104,,,DECam,2025-01-21 12:18:38.265930,DECam-colors 2025-01-19,0.116,0.057,0.044


In [6]:
# Transform PS to DECam photometry
# DCg-z = PSg-z + c0 + c1*(g-i) + c2*(g-i)^2 + c3*(g-i)^3
c0 = -0.02521
c1 = 0.11294
c2 = -0.01796
c3 = 0.00285
#new = dfdc.
#dfpstrans = dfps.values...

output_csv_file = 'colordatabase.csv'  # Replace with the desired output CSV file name

columns=['PGC','gzc0_PS','gzc0sig_PS','gzc1_PS','gzc1sig_PS','gzc2_PS','gzc2sig_PS','gzc5_PS','gzc5sig_PS','gzc0_DC','gzc0sig_DC','gzc1_DC','gzc1sig_DC','gzc2_DC','gzc2sig_DC','gzc5_DC','gzc5sig_DC','extg_PS','extz_PS','extg_DC','exti_DC','extz_DC','Name_PS','Name_DC']
dfout.to_csv(output_csv_file, mode='w', sep=',', na_rep='', float_format='%.3f', columns=columns, header=True, index=False, encoding='utf-8')

In [10]:
! cat {output_csv_file}

PGC,gzc0_PS,gzc0sig_PS,gzc1_PS,gzc1sig_PS,gzc2_PS,gzc2sig_PS,gzc5_PS,gzc5sig_PS,gzc0_DC,gzc0sig_DC,gzc1_DC,gzc1sig_DC,gzc2_DC,gzc2sig_DC,gzc5_DC,gzc5sig_DC,extg_PS,extz_PS,extg_DC,exti_DC,extz_DC,Name_PS,Name_DC
12438,1.395,0.045,1.388,0.054,,,,,,,,,,,,,0.521,0.217,,,,n1278,
45657,1.254,0.037,1.212,0.040,,,,,,,,,,,,,0.391,0.163,,,,n4993,
19622,1.426,0.033,1.394,0.036,1.335,0.046,,,,,,,,,,,0.406,0.169,,,,n2258,
16219,1.329,0.031,1.285,0.027,1.251,0.035,,,,,,,,,,,0.184,0.077,,,,n1684,
5098,1.380,0.028,1.385,0.030,1.408,0.035,,,,,,,,,,,0.197,0.082,,,,n0507,
25144,1.390,0.030,1.363,0.030,1.338,0.041,,,,,,,,,,,0.062,0.026,,,,n2693,
9997,1.416,0.026,1.395,0.026,1.403,0.033,,,,,,,,,,,0.099,0.041,,,,n1016,
66537,1.397,0.035,1.371,0.036,,,,,,,,,,,,,0.389,0.162,,,,n7052,
20338,1.410,0.040,1.404,0.038,,,,,,,,,,,,,0.235,0.098,,,,n2340,
4224,1.430,0.028,1.398,0.028,1.378,0.033,,,,,,,,,,,0.186,0.077,,,,n0410,
None,1.405,0.026,1.403,0.027,1.402,0.035,,,1.431,0.028,1.405,0.028,,,,,0.085,0.0

In [8]:
# clean up
temp = csv_file_PS
! rm {temp}
temp = csv_file_DC
! rm {temp}


In [9]:
# how to change the headings if needed:
# dfout.rename(columns={'old': 'new', 'old': 'new'}, inplace=True)
