# Adding Moon Phases to the Master Dataset
Because we first needed to have a clean set of dates and lat/longs, we will do this part last. Nice and quick.

In [None]:
# Add pylunar to the dataset. 
import pandas as pd
import pylunar
import glob
import datetime as dt
import numpy as np

In [None]:
# pylunar takes two tuples of 3 int
# first is latitude as degress, minutes, seconds
# then is longitude with degrees, minutes, seconds

# So we need to make a function that can convert decimal degrees to degrees, minutes, seconds.
def to_dms(dd): 
    """Converts decimal degrees to (degrees, minutes, seconds) for pylunar"""
    deg = int(dd) # get the interger of the decimal degrees --> degrees
    intermediate = ((dd-deg)*60) # subtract the dd from degrees and x 60...
    mins = int(intermediate) # the interger of above --> minutes
    secs = (intermediate - mins)*60 # subtract the intermediate from min and x 60... --> sec
    mins = abs(mins) # get the abs value in case its negative
    secs = abs(secs) # get the abs value in case it's negative
    return (deg, mins, secs) # return results in a tuple

In [None]:
# All of southern California should see the same moon, so we can generate the lunar phase
# for the same location, for the duration of the study period pretty quickly

date_sequence = pd.date_range('2012-01-01', '2019-11-01') # generate the range of dates
year = date_sequence.strftime('%Y') # and start splitting into year
month = date_sequence.strftime('%m') # month
day = date_sequence.strftime('%d') # and day... because pylunar requires a tuple of
    # (year, month, day, hour, minute, second)
    
# For easier iteration, we can combine these values into a dataframe
dates = pd.DataFrame([]) # make a blank dataframe
dates['Year'] = pd.to_numeric(year) # and add the year
dates['Month'] = pd.to_numeric(month) # the month
dates['Day'] = pd.to_numeric(day) # and the day

In [None]:
# Now, to get a location in our SoCal grid to generate moon phase for, let's just grab our
# first file and read in the first line
temp = pd.read_csv('D:/Documents/SpringBoard/capstone-1/datasets/final_files/combined/filled/2012-without_moon.csv',
                   nrows=1)

In [None]:
# And we can use our function to convert our lat/lngs that are in DD to DMS
lat = to_dms(temp['Lat'][0]) # convert the latitude
lng = to_dms(temp['Lng'][0]) # and convert the longitude

In [None]:
# Now, create a moon object (by default it uses the current datetime at that
# gps location) BUT moon objects think in UTC. So it's a good thing that our
# data are in UTC by default.
moon = pylunar.MoonInfo(lat, lng)

In [None]:
# Since we want it to give us data for 7 years, we could do a quick loop
phases = [] # start with an empty list

for row in range(len(dates)): # for each date
    moon.update((dates.loc[row,'Year'], dates.loc[row, 'Month'], dates.loc[row, 'Day'], 0, 0, 0))
        # update the moon data that corresponds to that UTC date
    phase = moon.phase_name() # next, we want to get the phase of the moon at that date
    phases.append(phase) # and add it to the phases list

In [None]:
# Time to clean up
phases = pd.DataFrame(phases, columns=['MoonPhase']) # lets make this into a dataframe
    # and label the columns appropriately
phases['Date'] = date_sequence.strftime('%Y-%m-%d') # and let's add the date in there
    # so that we can easily merge with the full dataset.

In [None]:
# Finally, it's time to add these data to our final datasets
file_names = glob.glob('D:/Documents/SpringBoard/capstone-1/datasets/final_files/combined/filled/*')
    # so, let's get a list of all the datasets we need to add moon phase to

# And let's run through the files
for file in file_names: # for each file
    dat = pd.read_csv(file) # read in the file
    dat = dat.merge(phases, how='left') # merge it with the phase data but only keep the 
        # moon phase for dates within dat
    filename = file.split('\\')[1] # and start to get the file name so that we can save
        # each file as a different name - start by getting the name of the file that we
        # read in...
    filename = filename.split('-')[0] # and take off the '-without_moon.csv' so we just 
        # have the year
    dat.to_csv('D:/Documents/SpringBoard/capstone-1/datasets/final_files/combined/filled/'+filename+'_all.csv', index=False)
        # and save the file with that name, and without an index row