In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import special
import h5py
from astropy.wcs import WCS
import astropy.coordinates as coord
import astropy.units as u
from pyutils import *
import types
import numpy.ma as ma
import sys

ROOT_FOLDER = "/Volumes/Seagate Backup Plus Drive/galaxy-groups-data/"


# Examination of the MXXL HDF5 Data


In [None]:
DATA_CUT_INDEX = 100000

In [None]:
weights = h5py.File(ROOT_FOLDER + 'weights_3pass.hdf5', 'r')
print(list(weights))
print(list(weights['Data']))
print(list(weights['Weight']))


In [None]:
assigned = weights['Weight/bitweight0'][0:DATA_CUT_INDEX] & 1 # choose 1 of the 2048 fiber assignment realizations with this bitstring
print(np.sum(assigned == 1), "galaxies were assigned a fiber")
print(np.sum(assigned == 0), "galaxies were NOT assigned a fiber")


In [None]:
# MEMORY TEST
input = weights
dec = input['Data/dec'][0:DATA_CUT_INDEX]
ra = input['Data/ra'][0:DATA_CUT_INDEX]
z_obs = input['Data/z_obs'][0:DATA_CUT_INDEX]
app_mag = input['Data/app_mag'][0:DATA_CUT_INDEX]

bright_filter = app_mag < 19.5 # makes a filter array (True/False values)
redshift_filter = z_obs > 0 # makes a filter array (True/False values)
keep = np.all([bright_filter, redshift_filter], axis=0)
dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]

count = len(dec)
print(count, "galaxies in HDF5 file")

# choose 1 of the 2048 fiber assignment realizations with this bitstring
fiber_assigned_0 = assigned.astype(bool)
fiber_assigned_0 = fiber_assigned_0[keep]
print(np.sum(fiber_assigned_0), "galaxies were assigned a fiber")

nn = NearestNeighbor(ra[fiber_assigned_0], dec[fiber_assigned_0], z_obs[fiber_assigned_0])

# z_eff: same as z_obs if a fiber was assigned and thus a real redshift measurement was made
# otherwise, it is an assigned value.
# nearest neighbor will find the nearest (measured) galaxy and use its redshift.
z_eff = np.copy(z_obs)
z_err = np.zeros(len(z_obs))
for i in range(0, count):
    if not fiber_assigned_0[i]:
        new_z = nn.get_z(coord.Angle(ra[i]*u.degree).radian, coord.Angle(dec[i]*u.degree).radian)

        #new_z = nearest_neighbor(coord.Angle(ra[i]*u.degree).radian, coord.Angle(dec[i]*u.degree).radian, ra_angles, dec_angles, z_assigned)
        
        z_err[i] = abs(z_eff[i] - new_z) * z_eff[i]
        #    print("Large error: {0:.2f} became {1:.2f}".format(z_eff[i], new_z))
        z_eff[i] = new_z



In [None]:
plt.hist(z_err, bins=50)
plt.yscale('log')
plt.xlabel('Fractional error')
plt.ylabel('Count')

In [None]:
all_z_obs = weights['Data/z_obs'][:]

print("Index of blueshifted one: ", np.argmin(all_z_obs))

print(min(all_z_obs), len(all_z_obs))

masked_all_z_obs = ma.masked_less_equal(all_z_obs, 0.0)
print(min(masked_all_z_obs), len(masked_all_z_obs))

np.count_nonzero(masked_all_z_obs.mask)


In [None]:
small_gal_type = weights['Data/galaxy_type'][0:DATA_CUT_INDEX] # 0 1 2 3 possible
bins = plt.hist(small_gal_type, bins=50)

In [None]:
small_z_obs = weights['Data/z_obs'][0:DATA_CUT_INDEX]
bins = plt.hist(small_z_obs, bins=50)


In [None]:
print(min(small_z_obs), len(small_z_obs))
masked_small_z_obs = ma.masked_less_equal(small_z_obs, 0.00143)
print(min(masked_small_z_obs), len(masked_small_z_obs))

np.count_nonzero(masked_small_z_obs.mask)


In [None]:
_cosmo = FlatLambdaCDM(H0=73, Om0=0.25, Ob0=0.045, Tcmb0=2.725, Neff=3.04) 
abs_mag = np.log10(_cosmo.luminosity_distance(-0.02).value)

vmax = get_max_observable_volume(abs_mag, -0.02, 19.5)
vmax

In [None]:
ra = weights['Data/ra'][:]#[0:DATA_CUT_INDEX]
dec = weights['Data/dec'][:]#[0:DATA_CUT_INDEX]

In [None]:
ra_angles = coord.Angle(ra*u.degree)
ra_angles = ra_angles.wrap_at(180*u.degree)
dec_angles = coord.Angle(dec*u.degree)

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111, projection="mollweide")
ax.scatter(ra_angles.radian, dec_angles.radian)
# This looks like Alex' paper, good
# TODO how to get frac_area from this?


In [None]:
bins = plt.hist(weights['Data/abs_mag'][0:DATA_CUT_INDEX], bins=50)

In [None]:
small_app_mag = weights['Data/app_mag'][0:DATA_CUT_INDEX]
bins = plt.hist(small_app_mag, bins=50)

In [None]:
small_abs_mag = weights['Data/abs_mag'][0:DATA_CUT_INDEX]
bins = plt.hist(small_abs_mag, bins=50)

In [None]:
my_abs_mag = app_mag_to_abs_mag(small_app_mag, small_z_obs)

In [None]:
# TODO Compare my_abs_mag to abs_mag. 
# Calculating luminosity distances from the cosmology is slow
small_abs_mag

In [None]:
# At what distance (luminosity distance) would the objects appear to be 19.5 mag?
v_max = get_max_observable_volume(my_abs_mag, small_z_obs, 19.5)

bins = plt.hist(np.log10(v_max), bins=50)
plt.xlabel("log(V_max) [Mpc]")
plt.ylabel("Count")

# Plots on data after running group finder

In [None]:
def process(filename):

    filename_props = str.replace(filename, ".out", "_galprops.dat")

    df = pd.read_csv(filename, delimiter=' ', names=('RA', 'Dec', 'z', 'L_gal', 'V_max', 'P_sat', 'M_halo', 'N_sat', 'L_tot', 'igrp', 'unknown'))
    galprops = pd.read_csv(filename_props, delimiter=' ', names=('app_mag', 'g_r', 'galaxy_type', 'mxxl_halo_mass', 'fiber_assigned_0'))
    all_data = pd.merge(df, galprops, left_index=True, right_index=True)

    # Drop bad data, should have been cleaned up earlier though!
    orig_count = len(all_data)
    all_data = all_data[all_data.M_halo != 0]
    new_count = len(all_data)
    if (orig_count != new_count):
        print("Dropped {0} bad galaxies".format(orig_count - new_count))

    all_data['is_sat'] = all_data.index != all_data.igrp
    all_data['logLgal'] = np.log10(all_data.L_gal)

    bins = np.logspace(np.log10(min(all_data.M_halo)), np.log10(max(all_data.M_halo)), 50)
    labels = bins[0:len(bins)-1] # using bottom (or top?) value, not middle
    all_data['Mh_bin'] = pd.cut(x = all_data['M_halo'], bins = bins, labels = labels, include_lowest = True)
    
    centrals = all_data[all_data.index == all_data.igrp]
    #logmstar_means = centrals.groupby('Mh_bin').log_M_star.mean()
    #logmstar_scatter = centrals.groupby('Mh_bin').log_M_star.std()
    loglcen_means = centrals.groupby('Mh_bin').logLgal.mean()
    loglcen_scatter = centrals.groupby('Mh_bin').logLgal.std()

    # Compute f_sat(Lgal)
    bins_logLgal = np.logspace(min(all_data.logLgal), max(all_data.logLgal), 30)
    labels_logLgal = bins[0:len(bins_logLgal)-1] # using bottom (or top?) value, not middle
    all_data['logLgal_bin'] = pd.cut(x = all_data['logLgal'], bins = bins_logLgal, labels = labels_logLgal, include_lowest = True)
    f_sat = all_data.groupby('logLgal_bin').is_sat.mean()
    

    dataset = types.SimpleNamespace()
    dataset.all_data = all_data
    dataset.bins = bins
    dataset.labels = labels
    dataset.centrals = centrals
    #dataset.logmstar_means = logmstar_means
    #dataset.logmstar_scatter = logmstar_scatter
    dataset.loglcen_means = loglcen_means
    dataset.loglcen_scatter = loglcen_scatter
    dataset.bins_logLgal = bins_logLgal
    dataset.labels_logLgal = labels_logLgal
    dataset.f_sat = f_sat

    return dataset

def plots(*frames):
    
    plt.figure()
    for f in frames:
        plt.errorbar(f.labels, f.loglcen_means, yerr=f.loglcen_scatter)
    plt.xscale('log')
    plt.xlabel('$M_{halo} / h$')
    plt.ylabel('$log(L_{cen})$')
    plt.title("Central Luminosity vs. Halo Mass")
    plt.legend()
    plt.draw()

    plt.figure()    
    for f in frames:
        plt.errorbar(f.labels, f.loglcen_scatter)
    plt.xscale('log')
    plt.xlabel('$M_{halo} / h$')
    plt.ylabel('$\\sigma(\\log(L_{cen})$')
    plt.title("Central Luminosity Scatter vs. Halo Mass")
    plt.draw()

    """     
    plt.figure()
    for f in frames:
        plt.scatter(f.centrals.M_halo, f.centrals.L_gal, alpha=0.002)
    plt.loglog()
    plt.xlabel('M_halo / h')
    plt.ylabel('L_gal / $h^2$)')
    plt.draw() 
    """

    plt.figure()
    for f in frames:
        plt.hist(f.centrals.N_sat, np.arange(0,50,1), alpha=0.5)
    plt.yscale('log')
    plt.ylabel("Number of Satellites")
    plt.xlabel("Count of Centrals")
    plt.title("How many satellites do Centrals have?")
    plt.draw()

    plt.figure()
    for f in frames:
        plt.plot(f.labels_logLgal, f.f_sat)
    plt.xlabel("$log(L_{cen})$")
    plt.ylabel("$f_{sat}$")
    plt.title("Satellite fraction vs Central Luminosity")
    plt.draw()

In [None]:
mini = process(ROOT_FOLDER + "mxxl_3pass_all_mini.out")
plots(mini)


In [None]:
all = process(ROOT_FOLDER + "mxxl_3pass_all.out")


In [None]:
fiberonly = process(ROOT_FOLDER + "mxxl_3pass_fiberonly.out")


In [None]:
plots(all, fiberonly)

In [None]:
num_galaxies_all = len(all.all_data)
num_galaxies_fiberonly= len(fiberonly.all_data)
print("{1} galaxies out of {0} had fibers assigned ({2:.1%}).".format(num_galaxies_all, num_galaxies_fiberonly, num_galaxies_fiberonly/num_galaxies_all))