In [None]:
%matplotlib inline

import os
import healpy as hp
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import pyspark
import pyspark.sql.functions as sparkfunc
import astropy.io.fits

matplotlib.rcParams['figure.dpi'] = 120

from dustmaps.sfd import SFDQuery

In [None]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.config('spark.master', "local[8]") \
                            .config('spark.memory.offHeap.enabled', 'true') \
                            .config('spark.memory.offHeap.size', '16G') \
                            .config('spark.driver.memory', '16G') \
                            .config("spark.sql.execution.arrow.enabled", "true").getOrCreate()

In [None]:
gaia_ = spark.read.load("/epyc/data/gaia_dr2_1am_dup/")
gaia = gaia_.drop("hpix12").withColumn("hpix12",
                        sparkfunc.floor(gaia_['source_id']/34359738368))

In [None]:
spark.conf.set("spark.sql.execution.arrow.enabled", "true")

In [None]:
%%time

sfd_order = 9
sfd = SFDQuery()
npix = hp.nside2npix(hp.order2nside(sfd_order))

angle_theta, angle_phi = hp.pix2ang(hp.order2nside(sfd_order), np.arange(npix))

reddening = sfd.query_equ(np.rad2deg(angle_phi), np.rad2deg(angle_theta) - 90.0 )

EBV_map_hpix12 = hp.ud_grade(reddening, hp.order2nside(12), order_in='NEST')
pandas_df = pd.DataFrame({"hpix12": np.arange(len(EBV_map_hpix12)),
                                             "EBV": EBV_map_hpix12})

reddening_df = spark.createDataFrame(pandas_df)

In [None]:
%%time
gaia_w_EBV = gaia.join(reddening_df, on="hpix12")

In [None]:
%%time
gaia_g0 = gaia_w_EBV.withColumn("phot_g0", gaia_w_EBV['phot_g_mean_mag'] - 3.1*gaia_w_EBV['EBV']).\
                      withColumn("bp_rp_0", gaia_w_EBV['bp_rp'] - 1.2919*gaia_w_EBV['EBV'])

gaia_g0.cache()
gaia_g0.count()