### ASTR 598 - Astrostatistics - Class Project - Group 1

- Tyler Gordon
- Meredith Durbin 
- Brianna Thomas
- Joachim Moeyens
- Dino Bektesevic

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from astropy.io import fits
from astropy.coordinates import SkyCoord

% matplotlib inline

[Google Drive for NSC files](https://drive.google.com/drive/folders/1r0LgsJ4LjUgcxUIYwPUZl-YcwxqO0z5c?usp=sharing)

In [2]:
# Assumes there is a directory called data 
# in the same level as this notebook
DATA_DIR = "data/"
HLC_FILES = glob.glob(os.path.join(DATA_DIR, "HLC*.fits"))
NSC_FILES = glob.glob(os.path.join(DATA_DIR, "stripe82*.txt"))

In [3]:
HLC_FILES

['data/HLC.RA_00_to_01.fits',
 'data/HLC.RA_01_to_02.fits',
 'data/HLC.RA_22_to_23.fits',
 'data/HLC.RA_03_to_04.fits',
 'data/HLC.RA_23_to_24.fits',
 'data/HLC.RA_02_to_03.fits',
 'data/HLC.RA_21_to_22.fits',
 'data/HLC.RA_20_to_21.fits']

In [4]:
NSC_FILES

['data/stripe82_315_ra_45_0_dec_1_3.txt',
 'data/stripe82_315_ra_45_-1_3_dec_0.txt']

In [5]:
# Simple function to read in HLC and NSC files into a single dataframe

def buildHLCDataFrame(datafiles,
                      filters=["u", "g", "r", "i", "z"],
                      columns=["MEAN_PSFMAG", "MEAN_OBJECT_TYPE", "RA_PM_CLIP", "DEC_PM_CLIP"],
                      makeColors=True,
                      magColumnName="MEAN_PSFMAG",
                      colors=["g-i"]):
    
    dfs = []
    for datafile in datafiles:
        hdul = fits.open(datafile)
        data = hdul[1].data
        df = pd.DataFrame()

        for column in columns:
            # Assume multi-dimensional fits columns are keyed on filters
            if data[column][0].shape == (len(filters),):
                for i, filt in enumerate(filters):
                    df["{}_{}".format(column, filt)] = data[column][:, i]
            # If not multi-dimensional just add to dataframe as normal
            elif data[column][0].shape == ():
                df[column] = data[column]
            # If it isn't singular in dimension, or fits the expected number of filters
            # raise a hopefully useful error
            else:
                raise ValueError("Shape of multi-dimensional column data does not match number of filters!")

        if makeColors is True:
            for color in colors:
                # color = filt2 - filt1
                filt1 = color.split("-")[1]
                filt2 = color.split("-")[0]
                df[color] = df["{}_{}".format(magColumnName, filt2)] - df["{}_{}".format(magColumnName, filt1)]
        dfs.append(df)
    final = pd.concat(dfs)
    final.reset_index(inplace=True, drop=True)
    return final

def buildNSCDataFrame(datafiles):

    dfs = []
    for datafile in datafiles:
        df = pd.read_csv(NSC_FILES[0], sep=" ")
        dfs.append(df)
    
    final = pd.concat(dfs)
    final.reset_index(inplace=True, drop=True)
    return final

In [6]:
hlc = buildHLCDataFrame(HLC_FILES)

In [7]:
nsc = buildNSCDataFrame(NSC_FILES)