In [1]:
import collections
import bisect
import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import matplotlib.pyplot as plt
import matplotlib as mpl
import scipy.signal as signal

from sklearn.neighbors import KernelDensity
from sklearn.mixture import GaussianMixture

In [2]:
#plotting things

#%matplotlib qt5 -- I don't know what this is
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

from cycler import cycler


#All of Anandh's customized seaborn/matplotlib settings

sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 1.5})
sns.set_style("ticks")
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

#%config InlineBackend.figure_f.ormats=['svg']

mpl.rc('axes', prop_cycle=(cycler('color', ['r', 'k', 'b','g','y','m','c']) ))

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

#mpl.rc('text', usetex=False)
#mpl.rc('text.latex', preamble=r'\usepackage{helvet}
#\renewcommand\familydefault{\sfdefault}\usepackage{sansmath}\sansmath')

    #If you want to use a different font
# mpl.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica'], 
#                  'serif': ['Helvetica']})

tw = 1.5
sns.set_style({"xtick.major.size": 3, "ytick.major.size": 3,
               "xtick.minor.size": 2, "ytick.minor.size": 2,
               'axes.labelsize': 16, 'axes.titlesize': 16,
               'xtick.major.width': tw, 'xtick.minor.width': tw,
               'ytick.major.width': tw, 'ytick.minor.width': tw})

mpl.rc('xtick', labelsize=14) 
mpl.rc('ytick', labelsize=14)
mpl.rc('axes', linewidth=1.5)
mpl.rc('legend', fontsize=14)
mpl.rc('figure', figsize=(9,8))

In [3]:
def get_values(file_directory, channel='GFP/FITC-A', gate = None):
    '''
    Reads in the values from a specific channel for a given flow file.
    Defaults to taking GFP/FITC-A. If you want to get the values inside a certain gate,
    the gate argument is the STR name of the column with the gate 0/1 values; defaults to None
    '''
    flow_data = pd.read_csv(file_directory)
    
    #allowing us to use a gate we have assigned using other scripts
    #if no gate desired
    if gate==None:
        pass
    #if you give it a gate, subselect all the data where the gate == 1
    else:
        flow_data = flow_data.loc[flow_data[gate] == 1, :]
    
    #apply the log transform
    flow_data_channel_values = np.log10(flow_data[channel].values)
    
    return flow_data_channel_values

In [4]:
def make_df(file_directory, channel='GFP/FITC-A', gate = None):
    '''
    Creates a dataframe from the given file directory. Reads in all csvs, 
    extracts the data from the channel of interest (defaults to GFP/FITC-A), 
    and returns one dataframe. If you want to get the values inside a certain gate,
    the gate argument is the STR name of the column with the gate 0/1 values; defaults to None
    '''
    all_files = glob.glob(file_directory)
    all_files.sort()
    
    all_data = []
    for file in all_files:
        data = get_values(file, channel, gate)
        all_data.append(data)
    
    labels = []
    for i in range(0, len(all_files)):
        #this won't get the well label right in the output df unless the file format is:
        #'../../../..\'(letter)(number).csv'
        #the forward slash is from glob filenames. this can't handle 'gated_data' at the beginning of the filename
        mini_label = str(all_files[i].split('/')[-1].split('\\')[1].split('.')[0])
        #print(mini_label)
        label = [mini_label]*len(all_data[i])
        labels.append(label)
    
    flat_all_data = [item for sublist in all_data for item in sublist]
    flat_labels = [item for sublist in labels for item in sublist]
    
    df = pd.DataFrame(dict(well=flat_labels, log10values=flat_all_data))
    return df

In [5]:
def get_peak_locations_from_KDE_fit(data):
    ''' Performs a KDE fit and then uses scipy.signal.find_peaks_cwt to get peaks.
        The KDE bandwith parameter is critical, and 0.25 has worked well in the past.
        If it feels like you are missing many peak calls, decrease the bandwith. If it feels
        like you are having too many peak calls, increase the bandwith. 
        
        Don't change the bandwith without good reason, it took awhile to decide on 0.25. '''
    
    kde = KernelDensity(bandwidth=0.25, kernel='gaussian')

    kde.fit(data[:, None]);

    x_range = np.linspace(0, 6, 1200)
    kde_estimates = np.exp(kde.score_samples(x_range[:, None]))

    #Use the SciPy function to get the KDE peaks
    peaks = signal.find_peaks_cwt(kde_estimates, np.arange(30, 200), min_snr=1)

    means_init = []
    
    for peak in peaks:
        means_init.append(x_range[peak])
    
    return means_init

In [6]:
def fit_GMM_KDE(data, peaks, threshold = 0.01): 
    """Generate a Gaussian mixture model from the output
    of a Gaussian Kernel Density Estimation. 
    Outputs the mean of the on peak, fraction on, mean of the off peak, 
    and fraction off. This version of the code assumes all cells not in the on peak are off!
    This is obviously only a good assumption for uni/bimodal data. If you have multimodal data,
    do not use this code."""
    
    data = data.reshape(len(data), 1)

    peaks = np.array(peaks).reshape(len(peaks), 1)
    opt_gmm = GaussianMixture(n_components = len(peaks) , means_init = peaks).fit(data)  

    labels = opt_gmm.predict(data)
    labels = np.ravel(labels.reshape(len(labels), 1))

    means = opt_gmm.means_
    
    #this df contains each measurement value and the gaussian it is associated with.
    #you can use this to pull out the measurements that fall into the desired gaussian for gating.
    df = pd.DataFrame({'fluor value': np.ravel(data), 'which_gaussian': labels})


    #df.head(10)
    counts = []
    means = []



    for i in range(0, len(peaks)):
        df_distro = df.loc[df['which_gaussian']==i]
        counts.append(len(df_distro))
        means.append(np.mean(df_distro['fluor value'].values))

    print('peaks identified in the kde fit of data: ', peaks)
    print('means of the gaussians the GMM optimized: ', means)
    print('counts in each gaussian: ', counts)
    total = len(df)
    # print('total: ', total)

    fractions = np.array(counts)/total
    print('raw fractions of data in each gaussian: ', fractions)

    ##Initializing corrected lists of means and fractions of subpopulations
    GMM_accepted_means = []

    GMM_corrected_fractions = []

    for i in range(0, len(fractions)):
        if fractions[i] > threshold: 
            GMM_accepted_means.append(means[i])
            GMM_corrected_fractions.append(fractions[i])    

    print('means of gaussians that contain fraction of data above thresh ({}): '.format(threshold), GMM_accepted_means)
    print('fraction of data in above-thresh gaussians: ', GMM_corrected_fractions)

    #uses the gaussian with the highest mean (the last one in the accepted means) as the ON gaussian
    index_of_on = GMM_accepted_means.index(max(GMM_accepted_means))

    #the threshold collects all the fractions that are above threshold, meaning its possible for the sum
    #of the collected fractions to be less than 1. This is fine because this line tosses everything except
    #the fraction of measurements in the ON gaussian and calculates the fraction in the OFF as 1-this, 
    #saying that any sub-threshold counts above this are OFF and everything below this is also OFF
    fraction_of_highest_peak = GMM_corrected_fractions[index_of_on]

    fraction_off = 1 - fraction_of_highest_peak

#     print('fraction in highest gaussian: ', fraction_of_highest_peak)
#     print('1 - fraction_in_highest_gaussian: ', fraction_off)

    mean_of_highest_peak = GMM_accepted_means[index_of_on]

    #because there can be many possible above-threshold means that are OFF (below the highest mean-ed gaussian (ON) )
    #we want to come up with an appropriate summary statistic for their value. This can be their mean mean, weighted by
    #the fraction of the data they represent
    #This means we multiply the mean of each non-ON gaussian by the fraction of measurements in that gaussian, then
    #take the average of these weighted values and divide by the total fraction of data represented in all these 
    #gaussians to get the weighted average value for all the non-ON data.
    weighted_peak_means = []
    fracs_to_use = []
    for i in range(0, len(GMM_corrected_fractions)):
        #if its the one corresponding to the highest gaussian, do nothing
        if i == index_of_on:
            pass
        #otherwise, compute the weighted average using this gaussian's data too.
        else: 
            #we retransform out of log space because avg of 2 and 3 in log is 2.5,
            #but real average is scaled by log 10 and avg value is not 10**2.5
            weighted_mean = GMM_corrected_fractions[i] * np.power(10, GMM_accepted_means[i])
            fracs_to_use.append(GMM_corrected_fractions[i])
            weighted_peak_means.append(weighted_mean)
            

    mean_of_off_population = np.log10(np.sum(weighted_peak_means)/np.sum(fracs_to_use))

    return mean_of_highest_peak, fraction_of_highest_peak, mean_of_off_population, fraction_off, df

In [7]:
def fit_GMM_KDE_wrapper (data):
    """Wrapper function to get both the peaks from a KDE fit, and then 
    from the Gaussian mixture model. Returns the mean of the broken cells, 
    and the fraction of broken cells."""
    
    peak_locations = get_peak_locations_from_KDE_fit(data)
    
    mean_of_highest_peak, fraction_of_highest_peak, \
        mean_of_off_population, fraction_off, \
        df_gaussian_distros = fit_GMM_KDE(data, peak_locations, threshold = 0.01)
    
    return mean_of_highest_peak, fraction_of_highest_peak, mean_of_off_population, fraction_off, df_gaussian_distros

In [8]:
def GMM_method(df_, wells, gate = None):
    '''Wrapper function for the the entire generation of the final output df. 
    Takes the input dataframe and a list of all wells you want to perform GMM fitting on.
    Adds a column for gate used, which is a shared input variable for the data INPUT stuff
    and this data analysis stuff.
    '''
    means_on = []
    fractions_on = []
    means_off = []
    fractions_off = []
    wells_df = []
    dfs_gaussian_distros = []
    
    for well in wells:
        data = df_.loc[df_['well'] == well]
        
        vals = data['log10values'].values
        
        mean_of_highest_peak, fraction_of_highest_peak, mean_of_off_population, \
                        fraction_off, df_gaussian_distros = fit_GMM_KDE_wrapper(vals)
        
        means_on.append(mean_of_highest_peak)
        fractions_on.append(fraction_of_highest_peak)
        means_off.append(mean_of_off_population)
        fractions_off.append(fraction_off)
        dfs_gaussian_distros.append(df_gaussian_distros)
        wells_df.append(well) 

    plt_df = pd.DataFrame({'mean ON': means_on, 'fraction ON' : fractions_on,
                           'mean OFF':means_off, 'fraction OFF' : fractions_off,
                           'well': wells_df})#, 'df_gaussian_distros': dfs_gaussian_distros})
                                                #dont need ^^^ these anymore now that I've implemented GMM gating
                                                #somewhere else
    plt_df['gate'] = gate
    
    return plt_df

In [9]:
def run_GMM_frac_analysis (file_exp, channel, wells, gate = None):
    """
    Runs the whole GMM fractional analysis on a bunch of files, getting you location and fraction ON and OFF
    in a particular channel
    
    Takes:
    
    file_exp  :  regular expression that tells glob what files to run
    channel  :  the fluorescence channel data you want to analyze
    wells  :  the list of wells you want to analyze, allows analysis of subset of the files in file_exp
    gate  :  the gate inside which you want to do the analysis, DEF to None
    
    ------------
    Returns
    
    output_df  :  a dataframe that has mean locations ON/OFF, fractions of reads in the ON/OFF gaussians,
                    the corresponding well, the gate in which the analysis was applied to this well
    """
    
    #Read in the tidied data generated by any of your gating scripts
    # The input for make_df is the directory that you want automatic fractions generated for. 
    input_df = make_df(file_exp, channel=channel, gate=gate)
    
    input_df = input_df.dropna()
    
    #now we have our mature input data, channel selected and gated
    
    #check if it looks right
    print(input_df.head())
    
    ######## now do the analysis #########
    
    # Generate an output_df by running GMM method. The inputs are the input_df and
    # the list of all wells you want generated
    output_df = GMM_method(input_df, wells, gate=gate)
    
    return output_df

In [10]:
# Initialize the list of wells that you have data files for and wish to fit with GMMs

#all 96 wells
wells = [i+j for i in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'] for j in ['1','2','3', '4', '5', '6', '7', '8', '9', '10', '11', '12']]

In [11]:
#set some static variables

channels = {'syto' : 'mKate/APC-A', 'yfp' : 'GFP/FITC-A', 'bfp' : 'CFP/VioBlue-A', 'cfp' : 'VioGreen-A'}


In [38]:
#set important variables
upper_dir = 'Z:/Reed/Projects/micro_consortia/DARPA_biocon/Task 1.1/A=B/20190214 A=B mar cfp yfp small screen 1/flow/'
tpt = '18'
f = upper_dir + tpt + '/*.csv'

key = 'yfp'
c = channels[key]

w = wells

gt = ['mKate/APC-A_GMMgate']

In [40]:
all_analysis = []

for g in gt:
    add = run_GMM_frac_analysis(f, c, w, g)
    
    all_analysis.append(add)
    
analysis = pd.concat(all_analysis, axis='index')



  well  log10values
0   A1     2.477515
1   A1     3.092553
2   A1     3.054189
3   A1     2.251818
4   A1     3.300649


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.87239366]]
means of the gaussians the GMM optimized:  [2.491311247539339]
counts in each gaussian:  [63830]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.491311247539339]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.1117598 ]
 [3.13261051]]
means of the gaussians the GMM optimized:  [2.1187926029643767, 3.1304154903207824]
counts in each gaussian:  [723, 357]
raw fractions of data in each gaussian:  [0.66944444 0.33055556]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1187926029643767, 3.1304154903207824]
fraction of data in above-thresh gaussians:  [0.6694444444444444, 0.33055555555555555]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.75729775]]
means of the gaussians the GMM optimized:  [2.569425653080447]
counts in each gaussian:  [111260]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.569425653080447]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.19683069]]
means of the gaussians the GMM optimized:  [2.353397523337402]
counts in each gaussian:  [21734]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.353397523337402]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


peaks identified in the kde fit of data:  [[2.19182652]
 [5.76980817]
 [5.79983319]
 [5.98999166]]
means of the gaussians the GMM optimized:  [2.1418315723013044, 1.207607469512837, nan, nan]
counts in each gaussian:  [6644, 539, 0, 0]
raw fractions of data in each gaussian:  [0.92496172 0.07503828 0.         0.        ]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1418315723013044, 1.207607469512837]
fraction of data in above-thresh gaussians:  [0.9249617151607963, 0.07503828483920368]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.18181818]]
means of the gaussians the GMM optimized:  [2.057832867439141]
counts in each gaussian:  [55776]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.057832867439141]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.70725605]]
means of the gaussians the GMM optimized:  [2.7849974399585617]
counts in each gaussian:  [207212]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7849974399585617]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.71226022]]
means of the gaussians the GMM optimized:  [2.7052984505321085]
counts in each gaussian:  [128820]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7052984505321085]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.77231026]]
means of the gaussians the GMM optimized:  [2.7712488218563767]
counts in each gaussian:  [25779]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7712488218563767]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.74228524]]
means of the gaussians the GMM optimized:  [2.7105965872757802]
counts in each gaussian:  [30451]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7105965872757802]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.00166806]
 [3.77814846]]
means of the gaussians the GMM optimized:  [1.8600367905118023, 3.742317890989279]
counts in each gaussian:  [39310, 7668]
raw fractions of data in each gaussian:  [0.83677466 0.16322534]
means of gaussians that contain fraction of data above thresh (0.01):  [1.8600367905118023, 3.742317890989279]
fraction of data in above-thresh gaussians:  [0.8367746604793733, 0.16322533952062668]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.01668057]
 [3.77314429]]
means of the gaussians the GMM optimized:  [1.8912466196979454, 3.755013783377196]
counts in each gaussian:  [17499, 3449]
raw fractions of data in each gaussian:  [0.83535421 0.16464579]
means of gaussians that contain fraction of data above thresh (0.01):  [1.8912466196979454, 3.755013783377196]
fraction of data in above-thresh gaussians:  [0.8353542104258163, 0.1646457895741837]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.90241868]]
means of the gaussians the GMM optimized:  [2.5059528299942064]
counts in each gaussian:  [14486]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.5059528299942064]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.14678899]
 [3.07756464]]
means of the gaussians the GMM optimized:  [2.1231545559996308, 3.136404280574586]
counts in each gaussian:  [1507, 1231]
raw fractions of data in each gaussian:  [0.55040175 0.44959825]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1231545559996308, 3.136404280574586]
fraction of data in above-thresh gaussians:  [0.5504017531044558, 0.4495982468955442]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.89241034]]
means of the gaussians the GMM optimized:  [2.599528697694718]
counts in each gaussian:  [10130]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.599528697694718]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.19182652]]
means of the gaussians the GMM optimized:  [2.4520255490768936]
counts in each gaussian:  [11085]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.4520255490768936]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.24687239]
 [4.79899917]]
means of the gaussians the GMM optimized:  [2.219407636682343, 1.1156351484487133]
counts in each gaussian:  [10586, 348]
raw fractions of data in each gaussian:  [0.96817267 0.03182733]
means of gaussians that contain fraction of data above thresh (0.01):  [2.219407636682343, 1.1156351484487133]
fraction of data in above-thresh gaussians:  [0.9681726723980245, 0.03182732760197549]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.27189324]]
means of the gaussians the GMM optimized:  [2.239031870046253]
counts in each gaussian:  [18928]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.239031870046253]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.74228524]]
means of the gaussians the GMM optimized:  [2.769644780587153]
counts in each gaussian:  [63029]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.769644780587153]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.77231026]]
means of the gaussians the GMM optimized:  [2.8095689326777853]
counts in each gaussian:  [22046]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.8095689326777853]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.80233528]]
means of the gaussians the GMM optimized:  [2.9117384589762243]
counts in each gaussian:  [22882]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.9117384589762243]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.80233528]]
means of the gaussians the GMM optimized:  [2.7989399544842537]
counts in each gaussian:  [9653]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7989399544842537]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.00667223]
 [3.87823186]]
means of the gaussians the GMM optimized:  [1.8759433985755234, 3.871579917541624]
counts in each gaussian:  [15838, 5817]
raw fractions of data in each gaussian:  [0.73137843 0.26862157]
means of gaussians that contain fraction of data above thresh (0.01):  [1.8759433985755234, 3.871579917541624]
fraction of data in above-thresh gaussians:  [0.7313784345416763, 0.2686215654583237]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.01668057]
 [3.82819016]]
means of the gaussians the GMM optimized:  [1.9171598633647664, 3.678006427464023]
counts in each gaussian:  [31927, 25662]
raw fractions of data in each gaussian:  [0.55439407 0.44560593]
means of gaussians that contain fraction of data above thresh (0.01):  [1.9171598633647664, 3.678006427464023]
fraction of data in above-thresh gaussians:  [0.5543940683116567, 0.4456059316883433]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.19683069]]
means of the gaussians the GMM optimized:  [2.480387400548761]
counts in each gaussian:  [8101]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.480387400548761]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.21684737]
 [3.19266055]]
means of the gaussians the GMM optimized:  [2.1733270105662847, 3.204017871570997]
counts in each gaussian:  [6629, 1521]
raw fractions of data in each gaussian:  [0.81337423 0.18662577]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1733270105662847, 3.204017871570997]
fraction of data in above-thresh gaussians:  [0.8133742331288344, 0.18662576687116564]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.91242702]]
means of the gaussians the GMM optimized:  [2.6747060836850873]
counts in each gaussian:  [13365]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.6747060836850873]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.22685571]]
means of the gaussians the GMM optimized:  [2.370932367812545]
counts in each gaussian:  [8118]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.370932367812545]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.24687239]]
means of the gaussians the GMM optimized:  [2.1870683815324714]
counts in each gaussian:  [5845]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1870683815324714]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.27189324]]
means of the gaussians the GMM optimized:  [2.224075835785167]
counts in each gaussian:  [12352]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.224075835785167]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.72727273]]
means of the gaussians the GMM optimized:  [2.9492740309084997]
counts in each gaussian:  [99391]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.9492740309084997]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.75229358]]
means of the gaussians the GMM optimized:  [2.7401911232922984]
counts in each gaussian:  [18631]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7401911232922984]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.79232694]]
means of the gaussians the GMM optimized:  [2.8156493762127357]
counts in each gaussian:  [12040]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.8156493762127357]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.79733111]]
means of the gaussians the GMM optimized:  [2.8445059942424233]
counts in each gaussian:  [11481]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.8445059942424233]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.0116764 ]
 [4.08840701]]
means of the gaussians the GMM optimized:  [1.9301115059891358, 3.9410254620876937]
counts in each gaussian:  [24219, 44943]
raw fractions of data in each gaussian:  [0.35017784 0.64982216]
means of gaussians that contain fraction of data above thresh (0.01):  [1.9301115059891358, 3.9410254620876937]
fraction of data in above-thresh gaussians:  [0.35017784332436885, 0.6498221566756311]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.0617181 ]
 [4.06338616]]
means of the gaussians the GMM optimized:  [1.9467993305099458, 3.970708432660387]
counts in each gaussian:  [58550, 69806]
raw fractions of data in each gaussian:  [0.4561532 0.5438468]
means of gaussians that contain fraction of data above thresh (0.01):  [1.9467993305099458, 3.970708432660387]
fraction of data in above-thresh gaussians:  [0.4561531989155162, 0.5438468010844838]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.92243536]]
means of the gaussians the GMM optimized:  [2.6138875891324385]
counts in each gaussian:  [9695]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.6138875891324385]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.23686405]]
means of the gaussians the GMM optimized:  [2.372302722526963]
counts in each gaussian:  [24458]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.372302722526963]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.97247706]]
means of the gaussians the GMM optimized:  [2.657494329968979]
counts in each gaussian:  [14712]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.657494329968979]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.24186822]]
means of the gaussians the GMM optimized:  [2.390088274539584]
counts in each gaussian:  [10222]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.390088274539584]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.27689741]]
means of the gaussians the GMM optimized:  [2.220257269568114]
counts in each gaussian:  [20006]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.220257269568114]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.28190158]]
means of the gaussians the GMM optimized:  [2.2398640995061365]
counts in each gaussian:  [12531]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.2398640995061365]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.74228524]]
means of the gaussians the GMM optimized:  [2.6951662557765323]
counts in each gaussian:  [12427]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.6951662557765323]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.75729775]]
means of the gaussians the GMM optimized:  [2.7596092954792666]
counts in each gaussian:  [15847]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7596092954792666]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.80733945]]
means of the gaussians the GMM optimized:  [2.8434798134030412]
counts in each gaussian:  [11280]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.8434798134030412]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.7823186]]
means of the gaussians the GMM optimized:  [2.8189620928251222]
counts in each gaussian:  [15397]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.8189620928251222]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.07673061]
 [4.1234362 ]]
means of the gaussians the GMM optimized:  [1.9891554897448973, 3.9761754687741773]
counts in each gaussian:  [22448, 22179]
raw fractions of data in each gaussian:  [0.50301387 0.49698613]
means of gaussians that contain fraction of data above thresh (0.01):  [1.9891554897448973, 3.9761754687741773]
fraction of data in above-thresh gaussians:  [0.5030138705268111, 0.4969861294731889]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.05671393]
 [4.12844037]]
means of the gaussians the GMM optimized:  [1.9669579726926059, 4.093081670675345]
counts in each gaussian:  [56301, 72888]
raw fractions of data in each gaussian:  [0.43580336 0.56419664]
means of gaussians that contain fraction of data above thresh (0.01):  [1.9669579726926059, 4.093081670675345]
fraction of data in above-thresh gaussians:  [0.4358033578710262, 0.5641966421289738]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.82235196]]
means of the gaussians the GMM optimized:  [2.599383136078015]
counts in each gaussian:  [3057]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.599383136078015]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.19683069]]
means of the gaussians the GMM optimized:  [2.484426363858926]
counts in each gaussian:  [29877]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.484426363858926]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.87739783]]
means of the gaussians the GMM optimized:  [2.6346332769523673]
counts in each gaussian:  [12950]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.6346332769523673]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.2118432]]
means of the gaussians the GMM optimized:  [2.4849787819324516]
counts in each gaussian:  [15992]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.4849787819324516]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.22685571]
 [5.33944954]]
means of the gaussians the GMM optimized:  [2.1439500440360795, 5.337236406061013]
counts in each gaussian:  [47970, 1]
raw fractions of data in each gaussian:  [9.99979154e-01 2.08459277e-05]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1439500440360795]
fraction of data in above-thresh gaussians:  [0.999979154072252]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.23185988]
 [4.83903253]]
means of the gaussians the GMM optimized:  [2.150638087250772, 4.837976100237882]
counts in each gaussian:  [23698, 1]
raw fractions of data in each gaussian:  [9.99957804e-01 4.21958732e-05]
means of gaussians that contain fraction of data above thresh (0.01):  [2.150638087250772]
fraction of data in above-thresh gaussians:  [0.9999578041267564]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.79733111]]
means of the gaussians the GMM optimized:  [2.7918940452765213]
counts in each gaussian:  [11098]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7918940452765213]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.81734779]]
means of the gaussians the GMM optimized:  [2.7752082248627215]
counts in each gaussian:  [5796]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7752082248627215]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.882402]]
means of the gaussians the GMM optimized:  [2.8848249872236766]
counts in each gaussian:  [4447]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.8848249872236766]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.85237698]]
means of the gaussians the GMM optimized:  [2.909632447415046]
counts in each gaussian:  [7335]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.909632447415046]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[1.99666389]
 [4.10842369]]
means of the gaussians the GMM optimized:  [1.920772748637801, 3.6707557189954123]
counts in each gaussian:  [174, 37]
raw fractions of data in each gaussian:  [0.82464455 0.17535545]
means of gaussians that contain fraction of data above thresh (0.01):  [1.920772748637801, 3.6707557189954123]
fraction of data in above-thresh gaussians:  [0.8246445497630331, 0.17535545023696683]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[0.35529608]
 [2.02168474]
 [4.11342786]]
means of the gaussians the GMM optimized:  [0.5092054543093593, 1.9705030849345841, 3.780222046918343]
counts in each gaussian:  [7, 186, 34]
raw fractions of data in each gaussian:  [0.030837   0.81938326 0.14977974]
means of gaussians that contain fraction of data above thresh (0.01):  [0.5092054543093593, 1.9705030849345841, 3.780222046918343]
fraction of data in above-thresh gaussians:  [0.030837004405286344, 0.8193832599118943, 0.14977973568281938]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.89741451]]
means of the gaussians the GMM optimized:  [2.5856615813767037]
counts in each gaussian:  [3298]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.5856615813767037]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.17681401]
 [3.17264387]]
means of the gaussians the GMM optimized:  [2.163717844037311, 3.214191285832004]
counts in each gaussian:  [10277, 4148]
raw fractions of data in each gaussian:  [0.71244367 0.28755633]
means of gaussians that contain fraction of data above thresh (0.01):  [2.163717844037311, 3.214191285832004]
fraction of data in above-thresh gaussians:  [0.7124436741767765, 0.28755632582322355]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.9324437]]
means of the gaussians the GMM optimized:  [2.674886579193932]
counts in each gaussian:  [22399]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.674886579193932]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.18682235]]
means of the gaussians the GMM optimized:  [2.5613345196265143]
counts in each gaussian:  [13451]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.5613345196265143]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.24687239]]
means of the gaussians the GMM optimized:  [2.1814221688161077]
counts in each gaussian:  [17425]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1814221688161077]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.24186822]]
means of the gaussians the GMM optimized:  [2.173061736007165]
counts in each gaussian:  [20915]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.173061736007165]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.78732277]]
means of the gaussians the GMM optimized:  [2.7886293101205015]
counts in each gaussian:  [15351]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7886293101205015]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.80733945]]
means of the gaussians the GMM optimized:  [2.810573923335876]
counts in each gaussian:  [11104]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.810573923335876]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.82235196]
 [5.4295246 ]]
means of the gaussians the GMM optimized:  [2.900327343778035, 1.5918803897096852]
counts in each gaussian:  [9924, 273]
raw fractions of data in each gaussian:  [0.97322742 0.02677258]
means of gaussians that contain fraction of data above thresh (0.01):  [2.900327343778035, 1.5918803897096852]
fraction of data in above-thresh gaussians:  [0.9732274198293616, 0.026772580170638424]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.8323603]]
means of the gaussians the GMM optimized:  [2.9146184216460265]
counts in each gaussian:  [11708]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.9146184216460265]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.00166806]
 [3.60800667]]
means of the gaussians the GMM optimized:  [1.8983396580593292, 3.9178745816896283]
counts in each gaussian:  [173, 21]
raw fractions of data in each gaussian:  [0.89175258 0.10824742]
means of gaussians that contain fraction of data above thresh (0.01):  [1.8983396580593292, 3.9178745816896283]
fraction of data in above-thresh gaussians:  [0.8917525773195877, 0.10824742268041238]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.08673895]
 [3.9883236 ]]
means of the gaussians the GMM optimized:  [1.9431081717839294, 3.74899479527123]
counts in each gaussian:  [170, 18]
raw fractions of data in each gaussian:  [0.90425532 0.09574468]
means of gaussians that contain fraction of data above thresh (0.01):  [1.9431081717839294, 3.74899479527123]
fraction of data in above-thresh gaussians:  [0.9042553191489362, 0.09574468085106383]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.97748123]]
means of the gaussians the GMM optimized:  [2.6240616203774834]
counts in each gaussian:  [5253]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.6240616203774834]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.21684737]
 [3.49791493]]
means of the gaussians the GMM optimized:  [2.1940661211711556, 3.3759691952783286]
counts in each gaussian:  [16359, 3023]
raw fractions of data in each gaussian:  [0.84403054 0.15596946]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1940661211711556, 3.3759691952783286]
fraction of data in above-thresh gaussians:  [0.844030543803529, 0.15596945619647096]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[3.04753962]]
means of the gaussians the GMM optimized:  [2.6997768518124365]
counts in each gaussian:  [6575]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.6997768518124365]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.19683069]
 [3.49791493]]
means of the gaussians the GMM optimized:  [2.1801574374265678, 3.3554468668591007]
counts in each gaussian:  [8262, 1769]
raw fractions of data in each gaussian:  [0.8236467 0.1763533]
means of gaussians that contain fraction of data above thresh (0.01):  [2.1801574374265678, 3.3554468668591007]
fraction of data in above-thresh gaussians:  [0.8236466952447413, 0.1763533047552587]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.24687239]]
means of the gaussians the GMM optimized:  [2.176735582085221]
counts in each gaussian:  [11646]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.176735582085221]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.2618849]]
means of the gaussians the GMM optimized:  [2.199734853263026]
counts in each gaussian:  [25117]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.199734853263026]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.75229358]]
means of the gaussians the GMM optimized:  [2.7526924121784706]
counts in each gaussian:  [8513]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7526924121784706]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.72727273]]
means of the gaussians the GMM optimized:  [2.851897766934982]
counts in each gaussian:  [15534]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.851897766934982]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.80233528]]
means of the gaussians the GMM optimized:  [3.0191602222533755]
counts in each gaussian:  [16475]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [3.0191602222533755]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[3.36780651]]
means of the gaussians the GMM optimized:  [3.019915327098238]
counts in each gaussian:  [15170]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [3.019915327098238]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[3.24270225]]
means of the gaussians the GMM optimized:  [3.1598929144100607]
counts in each gaussian:  [7]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [3.1598929144100607]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.06672227]
 [4.17848207]]
means of the gaussians the GMM optimized:  [1.8841971184013329, 4.134485873348768]
counts in each gaussian:  [168, 9]
raw fractions of data in each gaussian:  [0.94915254 0.05084746]
means of gaussians that contain fraction of data above thresh (0.01):  [1.8841971184013329, 4.134485873348768]
fraction of data in above-thresh gaussians:  [0.9491525423728814, 0.05084745762711865]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.15679733]
 [3.17764804]]
means of the gaussians the GMM optimized:  [2.140680575992739, 3.1331551280459284]
counts in each gaussian:  [3817, 3722]
raw fractions of data in each gaussian:  [0.50630057 0.49369943]
means of gaussians that contain fraction of data above thresh (0.01):  [2.140680575992739, 3.1331551280459284]
fraction of data in above-thresh gaussians:  [0.5063005703674227, 0.4936994296325773]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.2618849 ]
 [3.53294412]]
means of the gaussians the GMM optimized:  [2.25492765293528, 3.437570279696865]
counts in each gaussian:  [10176, 2881]
raw fractions of data in each gaussian:  [0.77935207 0.22064793]
means of gaussians that contain fraction of data above thresh (0.01):  [2.25492765293528, 3.437570279696865]
fraction of data in above-thresh gaussians:  [0.7793520716856859, 0.22064792831431415]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.18682235]
 [3.39783153]]
means of the gaussians the GMM optimized:  [2.2106518338647785, 3.25309676409173]
counts in each gaussian:  [19493, 14875]
raw fractions of data in each gaussian:  [0.56718459 0.43281541]
means of gaussians that contain fraction of data above thresh (0.01):  [2.2106518338647785, 3.25309676409173]
fraction of data in above-thresh gaussians:  [0.5671845903165735, 0.43281540968342647]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.23686405]
 [3.53794829]]
means of the gaussians the GMM optimized:  [2.2107943076200853, 3.3873696165463296]
counts in each gaussian:  [7522, 1592]
raw fractions of data in each gaussian:  [0.82532368 0.17467632]
means of gaussians that contain fraction of data above thresh (0.01):  [2.2107943076200853, 3.3873696165463296]
fraction of data in above-thresh gaussians:  [0.8253236778582401, 0.17467632214175993]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.27189324]
 [5.30942452]]
means of the gaussians the GMM optimized:  [2.245176247007544, 1.1708917793072358]
counts in each gaussian:  [12723, 420]
raw fractions of data in each gaussian:  [0.96804383 0.03195617]
means of gaussians that contain fraction of data above thresh (0.01):  [2.245176247007544, 1.1708917793072358]
fraction of data in above-thresh gaussians:  [0.9680438256105912, 0.03195617438940881]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.24186822]
 [4.77397832]]
means of the gaussians the GMM optimized:  [2.2136428370979186, 1.2655944884962114]
counts in each gaussian:  [4569, 154]
raw fractions of data in each gaussian:  [0.96739361 0.03260639]
means of gaussians that contain fraction of data above thresh (0.01):  [2.2136428370979186, 1.2655944884962114]
fraction of data in above-thresh gaussians:  [0.9673936057590514, 0.03260639424094855]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.72226856]]
means of the gaussians the GMM optimized:  [2.7779052144792367]
counts in each gaussian:  [12357]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.7779052144792367]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[3.35279399]]
means of the gaussians the GMM optimized:  [2.907745653380726]
counts in each gaussian:  [8916]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.907745653380726]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[3.36280234]]
means of the gaussians the GMM optimized:  [2.9909954818747044]
counts in each gaussian:  [12346]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [2.9909954818747044]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[3.34778982]]
means of the gaussians the GMM optimized:  [3.0246343359083787]
counts in each gaussian:  [10075]
raw fractions of data in each gaussian:  [1.]
means of gaussians that contain fraction of data above thresh (0.01):  [3.0246343359083787]
fraction of data in above-thresh gaussians:  [1.0]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


peaks identified in the kde fit of data:  [[2.04170142]
 [3.89824854]]
means of the gaussians the GMM optimized:  [1.877306402924504, 3.7646019480092754]
counts in each gaussian:  [178, 6]
raw fractions of data in each gaussian:  [0.9673913 0.0326087]
means of gaussians that contain fraction of data above thresh (0.01):  [1.877306402924504, 3.7646019480092754]
fraction of data in above-thresh gaussians:  [0.967391304347826, 0.03260869565217391]
peaks identified in the kde fit of data:  [[2.07172644]
 [3.94328607]]
means of the gaussians the GMM optimized:  [1.9265822993642225, 3.8321278020858482]
counts in each gaussian:  [145, 11]
raw fractions of data in each gaussian:  [0.92948718 0.07051282]
means of gaussians that contain fraction of data above thresh (0.01):  [1.9265822993642225, 3.8321278020858482]
fraction of data in above-thresh gaussians:  [0.9294871794871795, 0.07051282051282051]


  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval


In [41]:
analysis

Unnamed: 0,mean ON,fraction ON,mean OFF,fraction OFF,well,gate
0,2.491311,1.000000,,0.000000,A1,mKate/APC-A_GMMgate
1,3.130415,0.330556,2.118793,0.669444,A2,mKate/APC-A_GMMgate
2,2.569426,1.000000,,0.000000,A3,mKate/APC-A_GMMgate
3,2.353398,1.000000,,0.000000,A4,mKate/APC-A_GMMgate
4,2.141832,0.924962,1.207607,0.075038,A5,mKate/APC-A_GMMgate
5,2.057833,1.000000,,0.000000,A6,mKate/APC-A_GMMgate
6,2.784997,1.000000,,0.000000,A7,mKate/APC-A_GMMgate
7,2.705298,1.000000,,0.000000,A8,mKate/APC-A_GMMgate
8,2.771249,1.000000,,0.000000,A9,mKate/APC-A_GMMgate
9,2.710597,1.000000,,0.000000,A10,mKate/APC-A_GMMgate


In [42]:
# Write the output file to a csv. 
analysis.to_csv(upper_dir +'/' + tpt + '_' + key + '_GMManalysis.csv', index = False)