Where is the mass affected?
    plot_calcs: used for the overall stellar mass function
        -will return bin_centers -> make these the specific ones to go with each bin that actually has something
    hist_calcs: definitely affected; but where to change them?
        being fed mass_dat... mass_dat is going into plot_calcs
            dictionary values and error are zero
Should be okay:
    bin_func: should be okay. median distance to nearest neighbor shouldn't be calculated if the bin is empty...  
    
    
Notes:
    Need a definitive bin_centers variable for plotting the medians, among other things.

In [1]:
from __future__ import division, absolute_import

import astropy.stats
import glob
import math
import matplotlib.pyplot as plt 
from matplotlib import ticker
from matplotlib.ticker import FormatStrFormatter
import numpy as np 
import os
import pandas as pd
from scipy import integrate,optimize,spatial

In [2]:
__author__     =['Victor Calderon']
__copyright__  =["Copyright 2016 Victor Calderon, Index function"]
__email__      =['victor.calderon@vanderbilt.edu']
__maintainer__ =['Victor Calderon']

def Index(directory, datatype):
    """
    Indexes the files in a directory `directory' with a
    specific data type.

    Parameters
    ----------
    directory: str
            Absolute path to the folder that is indexed.

    datatype: str
            Data type of the files to be indexed in the folder.

    Returns
    -------
    file_array: array_like 
            np.array of indexed files in the folder 'directory' 
            with specific datatype.

    Examples
    --------
    >>> Index('~/data', '.txt')
    >>> array(['A.txt', 'Z'.txt', ...])
    """
    assert(os.path.exists(directory))
    files = np.array(glob.glob('{0}/*{1}'.format(directory, datatype)))

    return files

In [3]:
def myceil(x, base=10):
    """
    Returns the upper-bound integer of 'x' in base 'base'.

    Parameters
    ----------
    x: float
        number to be approximated to closest number to 'base'

    base: float
        base used to calculate the closest 'largest' number

    Returns
    -------
    n_high: float
        Closest float number to 'x', i.e. upper-bound float.

    Example
    -------
    >>>> myceil(12,10)
      20
    >>>>
    >>>> myceil(12.05, 0.1)
     12.10000 
    """
    n_high = float(base*math.ceil(float(x)/base))

    return n_high

###############################################################################    

def myfloor(x, base=10):
    """
    Returns the lower-bound integer of 'x' in base 'base'

    Parameters
    ----------
    x: float
        number to be approximated to closest number of 'base'

    base: float
        base used to calculate the closest 'smallest' number

    Returns
    -------
    n_low: float
        Closest float number to 'x', i.e. lower-bound float.

    Example
    -------
    >>>> myfloor(12, 5)
    >>>> 10
    """
    n_low = float(base*math.floor(float(x)/base))

    return n_low

###############################################################################

def Bins_array_create(arr, base=10):
    """
    Generates array between [arr.min(), arr.max()] in steps of `base`.

    Parameters
    ----------
    arr: array_like, Shape (N,...), One-dimensional
        Array of numerical elements

    base: float, optional (default=10)
        Interval between bins

    Returns
    -------
    bins_arr: array_like
        Array of bin edges for given arr

    """
    base = float(base)
    arr  = np.array(arr)
    assert(arr.ndim==1)
    arr_min  = myfloor(arr.min(), base=base)
    arr_max  = myceil( arr.max(), base=base)
    bins_arr = np.arange(arr_min, arr_max+0.5*base, base)

    return bins_arr

In [4]:
def sph_to_cart(ra,dec,cz):
    """
    Converts spherical coordinates to Cartesian coordinates.

    Parameters
    ----------
    ra: array-like
        right-ascension of galaxies in degrees
    dec: array-like
        declination of galaxies in degrees
    cz: array-like
        velocity of galaxies in km/s

    Returns
    -------
    coords: array-like, shape = N by 3
        x, y, and z coordinates
    """
    cz_dist = cz/70. #converts velocity into distance
    x_arr   = cz_dist*np.cos(np.radians(ra))*np.cos(np.radians(dec))
    y_arr   = cz_dist*np.sin(np.radians(ra))*np.cos(np.radians(dec))
    z_arr   = cz_dist*np.sin(np.radians(dec))
    coords  = np.column_stack((x_arr,y_arr,z_arr))

    return coords

############################################################################

def calc_dens(n_val,r_val):
    """
    Returns densities of spheres with radius being the distance to the 
        nth nearest neighbor.

    Parameters
    ----------
    n_val = integer
        The 'N' from Nth nearest neighbor
    r_val = array-like
        An array with the distances to the Nth nearest neighbor for
        each galaxy

    Returns
    -------
    dens: array-like
        An array with the densities of the spheres created with radii
        to the Nth nearest neighbor.
    """
    dens = np.array([(3.*(n_val+1)/(4.*np.pi*r_val[hh]**3)) \
                     for hh in range(len(r_val))])

    return dens

In [5]:
def plot_calcs(mass,bins,dlogM):
    """
    Returns values for plotting the stellar mass function and 
        mass ratios

    Parameters
    ----------
    mass: array-like
        A 1D array with mass values, assumed to be in order
    bins: array=like
        A 1D array with the values which will be used as the bin edges
        by the histogram function
    dlogM: float-like
        The log difference between bin edges

    Returns
    -------
    bin_centers: array-like
        An array with the medians mass values of the mass bins
    mass-freq: array-like
        Contains the number density values of each mass bin
    ratio_dict: dictionary-like
        A dictionary with three keys, corresponding to the divisors
        2,4, and 10 (as the percentile cuts are based on these 
        divisions). Each key has the density-cut, mass ratios for
        that specific cut (50/50 for 2; 25/75 for 4; 10/90 for 10).
    """

    mass_counts, edges = np.histogram(mass,bins)
    bin_centers        = 0.5*(edges[:-1]+edges[1:])

    mass_freq  = mass_counts/float(len(mass))/dlogM
    
#     non_zero   = (mass_freq!=0)

    ratio_dict = {}
    frac_val   = [2,4,10]

    yerr = []
    bin_centers_fin = []

    for ii in frac_val:
        ratio_dict[ii] = {}
        frac_data      = int(len(mass)/ii)
        
        # Calculations for the lower density cut
        frac_mass      = mass[0:frac_data]
        counts, edges  = np.histogram(frac_mass,bins)

        # Calculations for the higher density cut
        frac_mass_2       = mass[-frac_data:]
        counts_2, edges_2 = np.histogram(frac_mass_2,bins)

        # Ratio determination
        ratio_counts   = (1.*counts_2)/(1.*counts)
        
        non_zero = np.isfinite(ratio_counts)

        ratio_counts_1 = ratio_counts[non_zero]
        
#         print 'len ratio_counts: {0}'.format(len(ratio_counts_1))
        
        ratio_dict[ii] = ratio_counts_1
        
        temp_yerr = (counts_2*1.)/(counts*1.)*\
            np.sqrt(1./counts + 1./counts_2)
            
        temp_yerr_1 = temp_yerr[non_zero]
        
#         print 'len yerr: {0}'.format(len(temp_yerr_1))

        yerr.append(temp_yerr_1)
        
        bin_centers_1 = bin_centers[non_zero]
        
#         print 'len bin_cens: {0}'.format(len(bin_centers_1))
        
        bin_centers_fin.append(bin_centers_1)
        

    mass_freq_list     = [[] for xx in xrange(2)]
    mass_freq_list[0]  = mass_freq
    mass_freq_list[1]  = np.sqrt(mass_counts)/float(len(mass))/dlogM
    mass_freq          = np.array(mass_freq_list)

    ratio_dict_list    = [[] for xx in range(2)]
    ratio_dict_list[0] = ratio_dict
    ratio_dict_list[1] = yerr
    ratio_dict         = ratio_dict_list

    return bin_centers, mass_freq, ratio_dict, bin_centers_fin

In [6]:
def bin_func(mass_dist,bins,kk,bootstrap=False):
    """
    Returns median distance to Nth nearest neighbor

    Parameters
    ----------
    mass_dist: array-like
        An array with mass values in at index 0 (when transformed) and distance 
        to the Nth nearest neighbor in the others
        Example: 6239 by 7
            Has mass values and distances to 6 Nth nearest neighbors  
    bins: array=like
        A 1D array with the values which will be used as the bin edges     
    kk: integer-like
        The index of mass_dist (transformed) where the appropriate distance 
        array may be found

    Optional
    --------
    bootstrap == True
        Calculates the bootstrap errors associated with each median distance 
        value. Creates an array housing arrays containing the actual distance 
        values associated with every galaxy in a specific bin. Bootstrap error
        is then performed using astropy, and upper and lower one sigma values 
        are found for each median value.  These are added to a list with the 
        median distances, and then converted to an array and returned in place 
        of just 'medians.'

    Returns
    -------
    medians: array-like
        An array with the median distance to the Nth nearest neighbor from 
        all the galaxies in each of the bins
    """
    frac_vals    = np.array([2,4,10])
    
    edges        = bins

    # print 'length bins:'
    # print len(bins)

    digitized    = np.digitize(mass_dist.T[0],edges)
    digitized   -= int(1)

    bin_nums          = np.unique(digitized)
#     bin_nums_list     = list(bin_nums)

# #     if (len(bins)) in bin_nums_list:
# #         bin_nums_list.remove(len(bins))
#     # print 'removed'
#     # print bin_nums_list

#     bin_nums = np.array(bin_nums_list)

    for ii in bin_nums:
        if len(mass_dist.T[kk][digitized==ii]) == 0:
            temp_list = list(mass_dist.T[kk]\
                                             [digitized==ii])
            temp_list.append(np.zeros(len(bin_nums)))
            mass_dist.T[kk][digitized==ii] = np.array(temp_list)

    # print bin_nums
    # print len(bin_nums)
    
    medians  = np.array([np.median(mass_dist.T[kk][digitized==ii]) \
                for ii in bin_nums])

    # print len(medians)

    if bootstrap == True:
        dist_in_bin    = np.array([(mass_dist.T[kk][digitized==ii]) \
                for ii in bin_nums])
        for vv in range(len(dist_in_bin)):
            if len(dist_in_bin[vv]) == 0:
                dist_in_bin_list = list(dist_in_bin[vv])
                dist_in_bin[vv] = np.zeros(len(dist_in_bin[0]))
        low_err_test   = np.array([np.percentile(astropy.stats.bootstrap\
                        (dist_in_bin[vv],bootnum=1000,bootfunc=np.median),16) \
                        for vv in range(len(dist_in_bin))])
        high_err_test  = np.array([np.percentile(astropy.stats.bootstrap\
                        (dist_in_bin[vv],bootnum=1000,bootfunc=np.median),84) \
                        for vv in range(len(dist_in_bin))])

        med_list    = [[] for yy in range(len(frac_vals))]
        med_list[0] = medians
        med_list[1] = low_err_test
        med_list[2] = high_err_test
        medians     = np.array(med_list)

    return medians    
    

In [7]:
def hist_calcs(mass,bins,dlogM):
    """
    Returns dictionaries with the counts for the upper
        and lower density portions; calculates the 
        three different percentile cuts for each mass
        array given
    
    Parameters
    ----------
    mass: array-like
        A 1D array with log stellar mass values, assumed
        to be an order which corresponds to the ascending 
        densities; (necessary, as the index cuts are based 
        on this)
    bins: array-like
        A 1D array with the values which will be used as the bin edges   
    dlogM: float-like
        The log difference between bin edges
        
    Returns
    -------
    hist_dict_low: dictionary-like
        A dictionary with three keys (the frac vals), with arrays
        as values. The values for the lower density cut
    hist_dict_high: dictionary like
        A dictionary with three keys (the frac vals), with arrays
        as values. The values for the higher density cut
    """
    hist_dict_low  = {}
    hist_dict_high = {}
    frac_val  = np.array([2,4,10])
    frac_dict = {2:0,4:1,10:2}
    
    low_err   = [[] for xx in xrange(len(frac_val))]
    high_err  = [[] for xx in xrange(len(frac_val))]
    
    for ii in frac_val:
#         hist_dict_low[ii]  = {}
#         hist_dict_high[ii] = {}
    
        frac_data     = int(len(mass)/ii)
        
        frac_mass     = mass[0:frac_data]
        counts, edges = np.histogram(frac_mass,bins)
        low_counts    = (counts/float(len(frac_mass))/dlogM)
        
        non_zero = (low_counts!=0)
        low_counts_1 = low_counts[non_zero]
        hist_dict_low[ii]  = low_counts_1
        
        low_err = np.sqrt(counts)/len(frac_mass)/dlogM
        low_err_1 = low_err[non_zero]
        err_key = 'err_{0}'.format(ii)
        hist_dict_low[err_key] = low_err_1
        
        frac_mass_2        = mass[-frac_data:]
        counts_2, edges_2  = np.histogram(frac_mass_2,bins)
        high_counts        = (counts_2/float(len(frac_mass_2))/dlogM)
        
        non_zero = (high_counts!=0)
        high_counts_1 = high_counts[non_zero]
        hist_dict_high[ii] = high_counts_1
        
        high_err = np.sqrt(counts_2)/len(frac_mass_2)/dlogM
        high_err_1 = high_err[non_zero]
        hist_dict_high[err_key] = high_err_1
    
    return hist_dict_low, hist_dict_high

In [8]:
def mean_bin_mass(mass_dist,bins):
    """
    Returns median distance to Nth nearest neighbor

    Parameters
    ----------
    mass_dist: array-like
        An array with mass values in at index 0 (when transformed) 
    bins: array=like
        A 1D array with the values which will be used as the bin edges     

    Returns
    -------

    """
    edges        = bins

    digitized    = np.digitize(mass_dist.T[0],edges)
    digitized   -= int(1)

    bin_nums          = np.unique(digitized)

    mean_mass = np.array([np.mean(mass_dist.T[0][digitized==ii]) \
                for ii in bin_nums])

    return mean_mass 

In [None]:
# fig,ax = plt.subplots()
# ax.plot(all_mock_mass_means[0],test)
# ax.set_yscale('log')
# plt.show()
print test

In [10]:
dirpath  = r"C:\Users\Hannah\Desktop\Vanderbilt_REU\Stellar_mass_env_density"
dirpath += r"\Catalogs\Resolve_plk_5001_so_mvir_scatter_ECO_Mocks_"
dirpath += r"scatter_mocks\Resolve_plk_5001_so_mvir_scatter0p1_ECO_Mocks"

usecols  = (0,1,4,8,13)
dlogM    = 0.2
neigh_dict = {1:0,2:1,3:2,5:3,10:4,20:5}

In [11]:
ECO_cats = (Index(dirpath,'.dat'))

names    = ['ra','dec','Halo_ID','cz','logMstar']

PD = [[] for ii in range(len(ECO_cats))]

for ii in range(len(ECO_cats)):
    temp_PD = (pd.read_csv(ECO_cats[ii],sep="\s+", usecols= usecols,header=None,\
                   skiprows=2,names=names))
    col_list = list(temp_PD)
    col_list[2], col_list[3], col_list[4] = col_list[3], col_list[4], col_list[2]
    temp_PD.ix[:,col_list]
    PD[ii] = temp_PD

PD_comp_1  = [(PD[ii][PD[ii].logMstar >= 9.1]) for ii in range(len(ECO_cats))]
PD_comp  = [(PD_comp_1[ii][PD_comp_1[ii].logMstar <=11.77]) for ii in range(len(ECO_cats))]

[(PD_comp[ii].reset_index(drop=True,inplace=True)) for ii in range(len(ECO_cats))]

[None, None, None, None, None, None, None, None]

In [12]:
# for ii in range(len(ECO_cats)):
#     print len(PD[ii][PD[ii].logMstar>11.77])
# for ii in range(len(ECO_cats)):
#     print PD_comp[ii]


In [13]:
min_max_mass_arr = []

for ii in range(len(PD_comp)):
    min_max_mass_arr.append(max(PD_comp[ii].logMstar))
    min_max_mass_arr.append(min(PD_comp[ii].logMstar))

min_max_mass_arr = np.array(min_max_mass_arr)

bins = Bins_array_create(min_max_mass_arr,dlogM)
bins+= 0.1
bins_list = list(bins)
for ii in bins:
    if ii > 11.77:
        bins_list.remove(ii)

bins = np.array(bins_list)

num_of_bins = int(len(bins) - 1) 

ra_arr  = np.array([(PD_comp[ii].ra) \
    for ii in range(len(PD_comp))])

dec_arr  = np.array([(PD_comp[ii].dec) \
    for ii in range(len(PD_comp))])

cz_arr  = np.array([(PD_comp[ii].cz) \
    for ii in range(len(PD_comp))])

mass_arr  = np.array([(PD_comp[ii].logMstar) \
    for ii in range(len(PD_comp))])

halo_id_arr  = np.array([(PD_comp[ii].Halo_ID) \
    for ii in range(len(PD_comp))])


coords_test = np.array([sph_to_cart(ra_arr[vv],dec_arr[vv],cz_arr[vv]) \
                for vv in range(len(ECO_cats))])

neigh_vals  = np.array([1,2,3,5,10,20])

nn_arr_temp = [[] for uu in xrange(len(coords_test))]
nn_arr      = [[] for xx in xrange(len(coords_test))]
nn_arr_nn   = [[] for yy in xrange(len(neigh_vals))]
nn_idx      = [[] for zz in xrange(len(coords_test))]

for vv in range(len(coords_test)):
    nn_arr_temp[vv] = spatial.cKDTree(coords_test[vv])
    nn_arr[vv] = np.array(nn_arr_temp[vv].query(coords_test[vv],21)[0])
    nn_idx[vv] = np.array(nn_arr_temp[vv].query(coords_test[vv],21)[1])
    

nn_specs       = [(np.array(nn_arr).T[ii].T[neigh_vals].T) for ii in \
                    range(len(coords_test))]
nn_mass_dist   = np.array([(np.column_stack((mass_arr[qq],nn_specs[qq]))) \
                    for qq in range(len(coords_test))])

nn_neigh_idx      = np.array([(np.array(nn_idx).T[ii].T[neigh_vals].T) for ii in \
                    range(len(coords_test))])

In [15]:
halo_frac = {}
for ii in range(len(mass_arr)):
    halo_frac[ii] = {}
    mass_binning = np.digitize(mass_arr[ii],bins)
    bins_to_use = list(np.unique(mass_binning))
    if (len(bins)-1) not in bins_to_use:
        bins_to_use.append(len(bins)-1)
    if len(bins) in bins_to_use:
        bins_to_use.remove(len(bins))
    for jj in neigh_vals:
        one_zero = truth_vals[ii][jj].astype(int)
        frac = []
        for xx in bins_to_use:
            truth_binning = one_zero[mass_binning==xx]
            num_in_bin = len(truth_binning)
            if num_in_bin == 0:
                num_in_bin = np.nan
            num_same_halo = np.count_nonzero(truth_binning==1)
            frac.append(num_same_halo/(1.*num_in_bin))
        print (frac)
        halo_frac[ii][jj] = frac
# truth_vals[0][1].astype(int)

[0.20180995475113123, 0.22246696035242292, 0.19518072289156627, 0.16933333333333334, 0.16561514195583596, 0.17264957264957265, 0.17625231910946196, 0.2260536398467433, 0.44345238095238093, 0.6554054054054054, 0.9795918367346939, 1.0, 1.0]
[0.11402714932126697, 0.1277533039647577, 0.1108433734939759, 0.09466666666666666, 0.09779179810725552, 0.09743589743589744, 0.07050092764378478, 0.07662835249042145, 0.1636904761904762, 0.3783783783783784, 0.9183673469387755, 0.9, 1.0]
[0.07963800904977375, 0.09140969162995595, 0.0819277108433735, 0.06933333333333333, 0.07886435331230283, 0.07521367521367521, 0.04452690166975881, 0.04789272030651341, 0.08035714285714286, 0.22972972972972974, 0.7346938775510204, 0.9, 1.0]
[0.06153846153846154, 0.06607929515418502, 0.05903614457831325, 0.04666666666666667, 0.0473186119873817, 0.04786324786324787, 0.03153988868274583, 0.038314176245210725, 0.044642857142857144, 0.10810810810810811, 0.4489795918367347, 0.9, 1.0]
[0.03167420814479638, 0.04405286343612335,

In [16]:
nn_dict   = {1:0,2:1,3:2,5:3,10:4,20:5}

mean_mock_halo_frac = {}

for ii in neigh_vals:
    for jj in range(len(halo_frac)):
        bin_str = '{0}'.format(ii)
        oo_arr = halo_frac[jj][ii]
        n_o_elem = len(oo_arr)
        if jj == 0:
            oo_tot = np.zeros((n_o_elem,1))
        oo_tot = np.insert(oo_tot,len(oo_tot.T),oo_arr,1)
    oo_tot = np.array(np.delete(oo_tot,0,axis=1))
    oo_tot_mean = [np.nanmean(oo_tot[uu]) for uu in xrange(len(oo_tot))]
    oo_tot_std  = [np.nanstd(oo_tot[uu])/np.sqrt(len(halo_frac)) for uu in xrange(len(oo_tot))]
    mean_mock_halo_frac[bin_str] = [oo_tot_mean,oo_tot_std]

    


In [17]:
(mean_mock_halo_frac['1'])

[[0.23379138053932791,
  0.21858028420222159,
  0.21544218689643946,
  0.19484084018295725,
  0.1916983360327825,
  0.17881305668428565,
  0.18925335943566268,
  0.25090409503836009,
  0.42610801239159524,
  0.69867640167500467,
  0.91731741061158356,
  0.98750000000000004,
  1.0],
 [0.010932438720286266,
  0.013594205790084645,
  0.012579428692107293,
  0.013149385219144662,
  0.00962990573148465,
  0.0096880211798202678,
  0.0088482377169087391,
  0.0091612551141998875,
  0.0094493195356274689,
  0.0086873876135918522,
  0.010687875575692713,
  0.011692679333668564,
  0.0]]

In [18]:
def plot_halo_frac(bin_centers,y_vals,ax,plot_idx):
    titles = [1,2,3,5,10,20]
    ax.set_xlim(9.1,11.9)
    ax.set_xticks(np.arange(9.5,12.,0.5)) 
    ax.tick_params(axis='x', which='major', labelsize=16)
    title_here = 'n = {0}'.format(titles[plot_idx])
    ax.text(0.05, 0.95, title_here,horizontalalignment='left',\
            verticalalignment='top',transform=ax.transAxes,fontsize=18)
    if plot_idx == 4:
        ax.set_xlabel('$\log\ (M_{*}/M_{\odot})$',fontsize=20)
    ax.plot(bin_centers,y_vals,color='silver')
    
def plot_mean_halo_frac(bin_centers,mean_vals,ax,std):
    ax.errorbar(bin_centers,mean_vals,yerr=std,color='deeppink')


In [21]:
mass_bin_test = np.digitize(mass_arr[0],bins)
one_zero_test = truth_vals[0][1].astype(int)

test_arr_ones = one_zero_test[mass_bin_test==1]

print len(test_arr_ones)
print len(mass_bin_test)

print np.count_nonzero(test_arr_ones==1)

1105
6417
223


In [22]:
# haloid_neigh = halo_id_arr[ii][nn_neigh_idx[ii].T[neigh_dict[jj]]].values
# B = haloid_neigh == halo_id_arr[ii].values
# print(B)
# print(haloid_neigh)
# print(halo_id_arr[ii].values)

IndexError: index 20 is out of bounds for axis 0 with size 8

In [23]:
# nn_dist    = {}
nn_dens    = {}
mass_dat   = {}
ratio_info = {}
bin_cens_diff = {}

mass_freq  = [[] for xx in xrange(len(coords_test))]

for ii in range(len(coords_test)):
#     nn_dist[ii]    = {}
    nn_dens[ii]    = {}
    mass_dat[ii]   = {}
    ratio_info[ii] = {}
    bin_cens_diff[ii] = {}

#     nn_dist[ii]['mass'] = nn_mass_dist[ii].T[0]

    for jj in range(len(neigh_vals)):
#         nn_dist[ii][(neigh_vals[jj])]  = np.array(nn_mass_dist[ii].T\
#                                             [range(1,len(neigh_vals)+1)[jj]])
        nn_dens[ii][(neigh_vals[jj])]  = np.column_stack((nn_mass_dist[ii].T\
                                            [0],calc_dens(neigh_vals[jj],\
                                            nn_mass_dist[ii].T[range(1,len\
                                                (neigh_vals)+1)[jj]])))

        idx = np.array([nn_dens[ii][neigh_vals[jj]].T[1].argsort()])
        mass_dat[ii][(neigh_vals[jj])] = (nn_dens[ii][neigh_vals[jj]]\
                                            [idx].T[0])

        bin_centers, mass_freq[ii], ratio_info[ii][neigh_vals[jj]],bin_cens_diff[ii][neigh_vals[jj]] = \
                            plot_calcs(mass_dat[ii][neigh_vals[jj]],bins,dlogM)

all_mock_meds = [[] for xx in range(len(nn_mass_dist))]
all_mock_mass_means = [[] for xx in range(len(nn_mass_dist))]

for vv in range(len(nn_mass_dist)):
    all_mock_meds[vv] = np.array([bin_func(nn_mass_dist[vv],bins,(jj+1)) \
                                for jj in range(len(nn_mass_dist[vv].T)-1)])
    all_mock_mass_means[vv] = (mean_bin_mass(nn_mass_dist[vv],bins))  
    
# med_plot_arr = [([[] for yy in xrange(len(nn_mass_dist))]) \
#                                             for xx in xrange(len(neigh_vals))]

# for ii in range(len(neigh_vals)):
#     for jj in range(len(nn_mass_dist)):
#         med_plot_arr[ii][jj] = all_mock_meds[jj][ii]    

# for ii in range(len(neigh_vals)):
#     for jj in range(len(nn_mass_dist)):
#         print len(all_mock_meds[jj][ii])

# mass_freq_plot  = (np.array(mass_freq))
# max_lim = [[] for xx in range(len(mass_freq_plot.T))]
# min_lim = [[] for xx in range(len(mass_freq_plot.T))]
# for jj in range(len(mass_freq_plot.T)):
#     max_lim[jj] = max(mass_freq_plot.T[jj])
#     min_lim[jj] = min(mass_freq_plot.T[jj])

In [24]:
len(all_mock_mass_means[0])

13

In [25]:
bin_cens_diff


bins_curve_fit = bins.copy()
global bins_curve_fit

def abc():
    print bins_curve_fit
    
abc()

[  9.1   9.3   9.5   9.7   9.9  10.1  10.3  10.5  10.7  10.9  11.1  11.3
  11.5  11.7]


In [26]:
eco_path  = r"C:\Users\Hannah\Desktop\Vanderbilt_REU\Stellar_mass_env_density"
eco_path += r"\Catalogs\ECO_true"
eco_cols  = np.array([0,1,2,4])

In [27]:
ECO_true = (Index(eco_path,'.txt'))
names    = ['ra','dec','cz','logMstar']
PD_eco   = pd.read_csv(ECO_true[0],sep="\s+", usecols=(eco_cols),header=None,\
                skiprows=1,names=names)
eco_comp = PD_eco[PD_eco.logMstar >= 9.1]

ra_eco   = (np.array(eco_comp)).T[0]
dec_eco  = (np.array(eco_comp)).T[1] 
cz_eco   = (np.array(eco_comp)).T[2] 
mass_eco = (np.array(eco_comp)).T[3]

coords_eco        = sph_to_cart(ra_eco,dec_eco,cz_eco)
eco_neighbor_tree = spatial.cKDTree(coords_eco)
eco_tree_dist     = np.array(eco_neighbor_tree.query(coords_eco,\
                    (neigh_vals[-1]+1))[0])

eco_mass_dist = np.column_stack((mass_eco,eco_tree_dist.T[neigh_vals].T))
##range 1,7 because of the six nearest neighbors (and fact that 0 is mass)
##the jj is there to specify which index in the [1,6] array
eco_dens = ([calc_dens(neigh_vals[jj],\
            (eco_mass_dist.T[range(1,7)[jj]])) for jj in range\
            (len(neigh_vals))])

eco_mass_dens = [(np.column_stack((mass_eco,eco_dens[ii]))) for ii in \
                range(len(neigh_vals))]
eco_idx  = [(eco_mass_dens[jj].T[1].argsort()) for jj in \
            range(len(neigh_vals))]
eco_mass_dat  = [(eco_mass_dens[jj][eco_idx[jj]].T[0]) for jj in \
                range(len(neigh_vals))]

eco_ratio_info    = [[] for xx in xrange(len(eco_mass_dat))]
eco_final_bins    = [[] for xx in xrange(len(eco_mass_dat))]


for qq in range(len(eco_mass_dat)):
    bin_centers, eco_freq, eco_ratio_info[qq],eco_final_bins[qq] = plot_calcs(eco_mass_dat[qq],\
                                    bins,dlogM)

eco_medians   = [[] for xx in xrange(len(eco_mass_dat))]    
eco_mass_means   = [[] for xx in xrange(len(eco_mass_dat))] 

for jj in (range(len(eco_mass_dat))):
    eco_medians[jj] = np.array(bin_func(eco_mass_dist,bins,(jj+1),\
        bootstrap=True))
    eco_mass_means[jj] = (mean_bin_mass(eco_mass_dist,bins)) 

In [28]:
hist_low_info  = {}
hist_high_info = {}

for ii in xrange(len(coords_test)):
    hist_low_info[ii]  = {}
    hist_high_info[ii] = {}
    
    for jj in range(len(neigh_vals)):
        hist_low_info[ii][neigh_vals[jj]],hist_high_info[ii][neigh_vals[jj]] \
        = hist_calcs(mass_dat[ii][neigh_vals[jj]],bins,dlogM)
        
frac_vals     = [2,4,10]
hist_low_arr  = [[[] for yy in xrange(len(nn_mass_dist))] for xx in \
    xrange(len(neigh_vals))]
hist_high_arr = [[[] for yy in xrange(len(nn_mass_dist))] for xx in \
    xrange(len(neigh_vals))]

for ii in range(len(neigh_vals)):
    for jj in range(len(nn_mass_dist)):
        hist_low_arr[ii][jj]  = (hist_low_info[jj][neigh_vals[ii]])
        hist_high_arr[ii][jj] = (hist_high_info[jj][neigh_vals[ii]])
        
        
#         plot_low_hist  = [[[[] for yy in xrange(len(nn_mass_dist))] \
#                          for zz in xrange(len(frac_vals))] for xx in \
#                          xrange(len(hist_low_arr))]
        
#         plot_high_hist = [[[[] for yy in xrange(len(nn_mass_dist))] \
#                  for zz in xrange(len(frac_vals))] for xx in \
#                  xrange(len(hist_high_arr))]

# for jj in range(len(nn_mass_dist)):
#     for hh in range(len(frac_vals)):
#         for ii in range(len(neigh_vals)):
#             plot_low_hist[ii][hh][jj]  = hist_low_arr[ii][jj][frac_vals[hh]]        
#             plot_high_hist[ii][hh][jj] = hist_high_arr[ii][jj][frac_vals[hh]] 

In [29]:
eco_mass_means

[array([  9.19881916,   9.39594645,   9.59814583,   9.8007378 ,
          9.99990361,  10.19470893,  10.3920915 ,  10.59192308,
         10.79164706,  10.98988372,  11.18395349,  11.37583333,
         11.582     ,  11.746     ]),
 array([  9.19881916,   9.39594645,   9.59814583,   9.8007378 ,
          9.99990361,  10.19470893,  10.3920915 ,  10.59192308,
         10.79164706,  10.98988372,  11.18395349,  11.37583333,
         11.582     ,  11.746     ]),
 array([  9.19881916,   9.39594645,   9.59814583,   9.8007378 ,
          9.99990361,  10.19470893,  10.3920915 ,  10.59192308,
         10.79164706,  10.98988372,  11.18395349,  11.37583333,
         11.582     ,  11.746     ]),
 array([  9.19881916,   9.39594645,   9.59814583,   9.8007378 ,
          9.99990361,  10.19470893,  10.3920915 ,  10.59192308,
         10.79164706,  10.98988372,  11.18395349,  11.37583333,
         11.582     ,  11.746     ]),
 array([  9.19881916,   9.39594645,   9.59814583,   9.8007378 ,
          9.9999

In [30]:
eco_low  = {}
eco_high = {}
for jj in range(len(neigh_vals)):
    eco_low[neigh_vals[jj]]  = {}
    eco_high[neigh_vals[jj]] = {}
    eco_low[neigh_vals[jj]], eco_high[neigh_vals[jj]] = hist_calcs\
    (eco_mass_dat[jj],bins,dlogM)

In [31]:
def perc_calcs(mass,bins,dlogM):
    mass_counts, edges = np.histogram(mass,bins)
    mass_freq          = mass_counts/float(len(mass))/dlogM
    
    bin_centers        = 0.5*(edges[:-1]+edges[1:])
    
    non_zero = (mass_freq!=0)
    
    mass_freq_1 = mass_freq[non_zero]
    
    smf_err            = np.sqrt(mass_counts)/float(len(mass))/dlogM
    
    smf_err_1   = smf_err[non_zero] 
    
    bin_centers_1 = bin_centers[non_zero]

    return mass_freq_1, smf_err_1, bin_centers_1

In [32]:
def quartiles(mass):
    dec_val     =  int(len(mass)/4)
    res_list      =  [[] for bb in range(4)]

    for aa in range(0,4):
        if aa == 3:
            res_list[aa] = mass[aa*dec_val:]
        else:
            res_list[aa] = mass[aa*dec_val:(aa+1)*dec_val]

    return res_list

In [33]:
def deciles(mass):
    dec_val     =  int(len(mass)/10)
    res_list      =  [[] for bb in range(10)]

    for aa in range(0,10):
        if aa == 9:
            res_list[aa] = mass[aa*dec_val:]
        else:
            res_list[aa] = mass[aa*dec_val:(aa+1)*dec_val]

    return res_list

In [34]:
eco_dec = {}
for cc in range(len(eco_mass_dat)):
    eco_dec[neigh_vals[cc]] = deciles(eco_mass_dat[cc])
    
eco_dec_smf = {}
eco_dec_err = {}
eco_dec_bin = {}

for ss in neigh_vals:
    eco_dec_smf[ss] = {}
    eco_dec_err[ss] = {}
    eco_dec_bin[ss] = {}
    for tt in range(len(eco_dec[ss])):
        eco_dec_smf[ss][tt], eco_dec_err[ss][tt], eco_dec_bin[ss][tt] = perc_calcs(eco_dec[ss][tt],bins,dlogM)    

In [35]:
print len(eco_dec_smf[1][0])
print len(all_mock_mass_means[0][:-2])
print len(eco_dec_err[1][0])
print eco_dec_bin[1][0]
print all_mock_mass_means[0]
print len(bins_curve_fit[:-3])

11
11
11
[  9.2   9.4   9.6   9.8  10.   10.2  10.4  10.6  10.8  11.   11.2]
[  9.19822809   9.39940905   9.59760334   9.79848111  10.00151616
  10.20241456  10.3992579   10.59785147  10.78480071  10.99137783
  11.18597316  11.3528569   11.567247  ]
11


In [338]:
def schechter_real_func(mean_of_mass_bin,phi_star,alpha,Mstar):
    """
    
    mean_of_mass_bin: array-like
        Unlogged x-values
    phi-star: float-like
        Normalization value
    alpha: float-like
        Low-mass end slope
    Mstar: float-like
        Unlogged value where function switches from power-law to exponential
    
    """
#     M_over_mstar = (10**mean_of_mass_bin)/Mstar
    M_over_mstar = (mean_of_mass_bin)/Mstar
    res_arr    = phi_star/Mstar * M_over_mstar**(alpha) * \
                        np.exp(- M_over_mstar)

    return res_arr

In [396]:
def schechter_log_func(stellar_mass,phi_star,alpha,m_star):
    """
    Returns a plottable Schechter function for the 
        stellar mass functions of galaxies
    
    Parameters
    ----------
    stellar_mass: array-like
        An array of unlogged stellar mass values which 
        will eventually be the x-axis values the function
        is plotted against
    phi_star: float-like
        A constant which normalizes (?) the function;
        Moves the graph up and down
    alpha: negative integer-like
        The faint-end, or in this case, low-mass slope;
        Describes the power-law portion of the curve
    m_star: float-like
        Unlogged value of the characteristic (?) stellar
        mass; the "knee" of the function, where the 
        power-law gives way to the exponential portion
        
    Returns
    -------
    res: array-like
        Array of values prepadodgerblue to be plotted on a log
        scale to display the Schechter function
        
    """
    constant = np.log(10) * phi_star
    log_M_Mstar = np.log10(stellar_mass/m_star)
    res = constant * 10**(log_M_Mstar * (alpha+1)) * \
        np.exp(-10**log_M_Mstar)
        
    return res

In [355]:
print len(test)
print len(bins_curve_fit)

11
14


In [412]:
def find_params(bin_int,mean_mass,count_err):
    """
    Parameters
    ----------
    bin_int: array-like
        Integral (number of counts) in each bin of width dlogM
    mean_mass: array-like
        Logged values (?)

    Returns
    -------
    opt_v: array-like
        Array with three values: phi_star, alpha, and M_star
    res_arr: array-like
        Array with two values: alpha and log_M_star


    """
    xdata = 10**mean_mass
#     xdata = mean_mass
    p0    = (1,-1.05,10**10.64)
    opt_v,est_cov = optimize.curve_fit(schechter_real_func,xdata,\
                            bin_int,p0=p0,sigma=count_err,check_finite=True)
    alpha   = opt_v[1]
    log_m_star    = np.log10(opt_v[2])
    
    res_arr = np.array([alpha,log_m_star])
    
    perr = np.sqrt(np.diag(est_cov))

    return opt_v, res_arr, perr, est_cov

In [410]:
# test_int

In [413]:
# print len(eco_dec_smf[1][0])
# print len(eco_mass_means[0])
# print len(all_mock_mass_means[0][:-2])
# print len(eco_dec_err[1][0])

opt_v, res_arr, perr, est_cov = find_params(eco_dec_smf[1][0],eco_mass_means[0][:-3],eco_dec_err[1][0])
# opt_v, res_arr, perr, est_cov = find_params(eco_dec_smf[1][0],eco_mass_means[0][:-3],None)

In [414]:
print opt_v
print res_arr
# print perr
# print eco_dec_err[1][0]
# print est_cov

[  1.00000000e+00  -1.05000000e+00   4.36515832e+10]
[ -1.05  10.64]


In [302]:
# print opt_v

print len(test)
print len(all_mock_mass_means[0])
print len(test)
print len(eco_mass_means[0])
print len(eco_dec_smf[1][0])

11
13
11
14
11


In [303]:
idx_edge = len(all_mock_mass_means[0]) - len(eco_dec_smf[1][0])
print len(eco_dec_smf[1][0])
print len(all_mock_mass_means[0][:-idx_edge])
print len(eco_dec_err[1][0])
# opt_v, res_arr, perr, est_cov = find_params(eco_dec_smf[1][0],all_mock_mass_means[0][:-(idx_edge)],eco_dec_err[1][0])

11
11
11


In [407]:
test = schechter_log_func(eco_mass_means[0][:-3],opt_v[0],(opt_v[1]),opt_v[2])

In [408]:
print len(eco_mass_means[0])
print len(test)
print len(eco_dec_smf[1][0])
print test
# print test*10**10
print eco_dec_smf[1][0]
# print np.log10(eco_dec_smf[1][0])

# [  1.27691357e-09   7.52289969e-10   4.29532422e-10   2.38074992e-10
#    1.27498704e-10   6.47224003e-11   2.92667401e-11   1.10292764e-11
#    3.16160331e-12   5.95850360e-13   6.06349916e-14]

14
11
11
[ 7.01502276  7.00758964  7.00013348  6.99282645  6.98579598  6.97906024
  6.97237184  6.96573499  6.95923179  6.95290081  6.946818  ]
[ 0.95302013  0.87919463  0.72483221  0.68456376  0.55704698  0.59060403
  0.29530201  0.18120805  0.10738255  0.02013423  0.00671141]


In [395]:
fig, ax = plt.subplots()
ax.set_yscale('log')
ax.plot(eco_mass_means[0][:-3],test)
ax.plot(eco_mass_means[0][:-3],(eco_dec_smf[1][0]))
plt.show()

In [54]:
# truth_vals[ii] = {}
# for kk in range(10):
#     truth_list.append(np.unique(halo_id_arr[ii][nn_neigh_idx[ii].T[neigh_dict[jj]]\
#                                   [halo_id_arr[ii][nn_neigh_idx[ii].T[neigh_dict[jj]]].keys()[kk]]])==halo_id_arr[ii][kk])
#     truth_vals[ii][jj] = truth_list

truth_vals = {}
for ii in range(len(halo_id_arr)):
    truth_vals[ii] = {}
    for jj in neigh_vals:
        halo_id_neigh = halo_id_arr[ii][nn_neigh_idx[ii].T[neigh_dict[jj]]].values
        truth_vals[ii][jj] = halo_id_neigh==halo_id_arr[ii].values

In [50]:
# len(test_int)

# fig, ax = plt.subplots()
# ax.set_yscale('log')
# ax.plot(all_mock_mass_means[0],test_int)
# plt.show()

ValueError: x and y must have same first dimension

In [55]:
nrow = int(2)
ncol = int(3)

fig,axes = plt.subplots(nrows=nrow,ncols=ncol,\
                        figsize=(100,200),sharex=True)
axes_flat = axes.flatten()

zz = int(0)
while zz <=4:
    for jj in neigh_vals:
        for kk in range(len(halo_frac)):
            plot_halo_frac(bin_centers,halo_frac[kk][jj],axes_flat[zz],zz)
        nn_str = '{0}'.format(jj)
        plot_mean_halo_frac(bin_centers,mean_mock_halo_frac[nn_str][0],axes_flat[zz],mean_mock_halo_frac[nn_str][1])
        zz += 1

plt.subplots_adjust(top=0.97,bottom=0.1,left=0.03,right=0.99,hspace=0.10,wspace=0.12)           
plt.show()            