<img src="https://electronaut.info/wp-content/uploads/2017/05/Tubescape_jupyter2.jpg">

<h1><font color = b30000>TUBE DATA MATCHER for uTRACER FILES</font></h1><h5>Anode current for a sweep of grid voltages while anode voltage is constant</h5>


# STEP 1 — Import and process tube data, build dataframe.

In [None]:
from ElectronauTracer_Importer_restruct import *

### Start the Matched Set Finder

In [None]:
# STEP 7:  Build a dataframe of the best matched sets in order

# math lib used for generating ordinal strings; i.e. ['1st', '2nd', '3rd'] etc.
import math

# Make a local version of df_dif that can be butchered
df_dif_copy = df_dif.copy()

def ask_tube_set_size():
    
    # Ask the user to enter the set size
    tube_set_size = input("Please enter the number of tubes per matched set:")
    
    # Build a list of tube names for the tube set size
    tube_sets_columns = ['Tube_' + str(1 + i) for i in range(int(tube_set_size))]    
    
    return tube_set_size, tube_sets_columns 


def find_best_set_for_chosen_tube_ID(df_dif_copy, tube_to_match, tube_set_size, tube_sets_columns):
    tube_to_match = str(tube_to_match)
        
    # Create a new dataframe showing only tube_set_size number of nearest matches to the chosen tube_to_match
    df_tube_set = df_dif_copy[(df_dif_copy.ref_tube_ID == tube_to_match)].sort_values('error_sum').head(int(tube_set_size))
        
    # Create the dictionary of the tubes in the set and the total score of the set
    tube_set_list = list(df_tube_set.match_tube_ID)
    tube_set_dict = dict(zip(tube_sets_columns, tube_set_list))   
    tube_set_score = df_tube_set.error_sum.sum()
    tube_set_dict.update({'tube_set_score':tube_set_score})

    return tube_set_dict


def find_best_set(df_dif_copy, tube_set_size, tube_sets_columns):
    
    # Make an empty list to hold the tube sets and their scores
    tube_sets_and_scores_list = []
    temp_dict = {}
    
    if 'tube_set_score' in tube_sets_columns:
        tube_sets_columns.remove('tube_set_score')

    for i in df_dif_copy.ref_tube_ID.unique():
        temp_dict = find_best_set_for_chosen_tube_ID(df_dif_copy, i, tube_set_size, tube_sets_columns)
        tube_sets_and_scores_list.append(temp_dict)

    # Add 'tube_set_score' column name at the front of the list
    if 'tube_set_score' not in tube_sets_columns: 
        tube_sets_columns.insert(0, 'tube_set_score')    
    
    # Create a dataframe for the tube sets called df_tube_sets 
    df_tube_sets = pd.DataFrame(tube_sets_and_scores_list, columns=tube_sets_columns)
    
    # Sort the tube_set_score column, take the top row (with the lowest score) as a new df 
    df_best_set_temp = df_tube_sets.sort_values('tube_set_score').head(1)
        
    return df_best_set_temp


# A function to plot a line graph of a set of tubes by passing in a list of tube numbers
def plot_tubes(list_of_tubes, df):
    
    output_file("Tube_Set_Statistics.html")

    # Configure the size, title, etc.
    p = figure(plot_width=1000, plot_height=750, title=str(list_of_tubes))
    p.title.text_color = "black"

    # Make a list for the x_values by chopping off all the "bias_" column name prefixes and converting to ints
    temp_columns = list(df.columns)
    x_values = [int(i[5:]) for i in temp_columns if i.startswith('Bias_') == True]        


    # Set axis labels
    p.xaxis.axis_label = "Grid Voltage (V)"
    p.xaxis.axis_label_text_color = "#aa6666"
    p.yaxis.axis_label = "Plate Current (mA)"

    # Set grid lines
    p.ygrid.minor_grid_line_color = 'navy'
    p.ygrid.minor_grid_line_alpha = 0.2
    p.xgrid.minor_grid_line_color = 'navy'
    p.xgrid.minor_grid_line_alpha = 0.2

    # Set background color
    p.background_fill_color = "beige"
    p.background_fill_alpha = 0.5

    for tube in list_of_tubes:
        row = ((df.loc[df['tube_ID'] == tube[0:3]]).values.tolist())[0]
        row = [float(i) for i in row[3:]]
        p.line(x_values, row, line_width=2)

    return p    


    
# Start a timer
start_time = time.time()   
    
# Make an empty dataframe to hold all the sets and their scores
df_best_tube_sets = pd.DataFrame()

# Get the tube_set_size and the tube_sets_columns from the ask_tube_set_size function
tube_set_size, tube_sets_columns = ask_tube_set_size()

# Get the best tube set from the batch
df_best_set_temp = find_best_set(df_dif_copy, tube_set_size, tube_sets_columns) 

# Display a progress bar
progress_bar_range = int(len(df)/int(tube_set_size))
best_matched_progress_bar = FloatProgress(min=0, max=progress_bar_range)
display(best_matched_progress_bar)

# Append the best tube set to the df_best_tube_sets dataframe
df_best_tube_sets = df_best_tube_sets.append(df_best_set_temp)

# Remove the best tube set from df_dif_copy
tube_delete_list = list(df_best_set_temp.iloc[0])
tube_delete_list.pop(0)
df_dif_copy = df_dif_copy[~df_dif_copy.ref_tube_ID.isin(tube_delete_list)]
df_dif_copy = df_dif_copy[~df_dif_copy.match_tube_ID.isin(tube_delete_list)]

# A function to generate ordinal strings; i.e. ['1st', '2nd', '3rd'] etc.
ordinal = lambda n: "%d%s" % (n,"tsnrhtdd"[(math.floor(n/10)%10!=1)*(n%10<4)*n%10::4])
ordinal_list = [ordinal(n) for n in range(1, int(len(df)))]

set_count = 1
print()
print("*** BEST! ***")
print(ordinal_list[0],"best tube set SCORE:", str(df_best_set_temp.tube_set_score.iloc[0]))
print("TUBES:", tube_delete_list)

# Update the progress bar
best_matched_progress_bar.value += 1

tube_matches_master_plot_list = []

while len(df_dif_copy.ref_tube_ID.unique()) >= int(tube_set_size):

    # Get all the rest of the sets from the batch
    df_best_set_temp = find_best_set(df_dif_copy, tube_set_size, tube_sets_columns)
    
    # Append the best tube set to the df_best_tube_sets dataframe
    df_best_tube_sets = df_best_tube_sets.append(df_best_set_temp)

    # Remove the best tube set from df_dif_copy
    tube_delete_list = list(df_best_set_temp.iloc[0])
    tube_delete_list.pop(0)

    df_dif_copy = df_dif_copy[~df_dif_copy.ref_tube_ID.isin(tube_delete_list)]
    df_dif_copy = df_dif_copy[~df_dif_copy.match_tube_ID.isin(tube_delete_list)]
    print()
    print(ordinal_list[set_count],"best tube set SCORE:", str(df_best_set_temp.tube_set_score.iloc[0]))
    print("TUBES:", tube_delete_list)
    tube_delete_list.insert(0, ordinal)

    tube_matches_master_plot_list.insert(1 ,tube_delete_list[1:])

    set_count += 1
    
    # Update the progress bar
    best_matched_progress_bar.value += 1

    
    



stop_time = time.time()
elapsed_time = stop_time - start_time
print()
print("It took", round(elapsed_time, 2), "seconds to produce", set_count, "tube sets.")

# print(tube_matches_master_plot_list[0])

# plot_tubes((list(df_best_tube_sets.iloc[0])[1:]), df)
    
# Build the plot object
# tube_plot = plot_tubes(tube_delete_list, df)

# Show the plot
# show(tube_plot)



best_set_list = list(df_best_tube_sets.iloc[0])[1:]
worst_set = df_best_tube_sets.tail(1)
worst_set_list = list(worst_set.iloc[0])[1:]






# MATCHED SETS (lower score is better)

In [None]:
df_best_tube_sets.head()

# Plot any tube set by entering the row number 

In [None]:
# PLOT by row number
row_number = 0

# Make the list of tubes to plot
tubes_to_plot = df_best_tube_sets.iloc[row_number].tolist()[1:]

# Build the plot object
tube_plot = plot_tubes(tubes_to_plot, df)

show(tube_plot)

# Find all possible complimentary pairs of 4-tube sets from an 8-tube list

In [None]:
tubelist = tubes_to_plot.copy()[0:8]

# make a temoporary working dataframe containing only the tubes in the tubelist
tubelist


In [None]:
tubelist_df = df[df['tube_ID'].isin(tubelist)]

In [None]:
# Make a list of all possible 4-tube sets from a list of 8 tubes
from itertools import combinations

def find_all_possible_combinations(tubelist):
    all_possible_combinations = list(combinations(tubelist, 4))
    return all_possible_combinations

# Find the compliment to any 4-tube set
def find_compliment(combination, all_possible_combinations):
    filtered_list = all_possible_combinations.copy()
    for each_tube in combination:
        filtered_list = list(filter(lambda x: each_tube not in x, filtered_list)) 
    compliment = filtered_list[0]    
    return compliment

# Compile a master list of all the possible 4-tube combinations
def make_list_of_pairs(tubelist):
    all_possible_combinations = find_all_possible_combinations(tubelist)
    temp_combinations_list = all_possible_combinations.copy()
    list_of_pairs = []
    for each_combination in all_possible_combinations:
        if each_combination in temp_combinations_list:
            temp_compliment = find_compliment(each_combination, all_possible_combinations)
            temp_combinations_list.remove(temp_compliment)
            temp_combinations_list.remove(each_combination)
            temp_combo_pair = (each_combination, temp_compliment)
            list_of_pairs.append(temp_combo_pair)
    return list_of_pairs

all_possible_pairs = make_list_of_pairs(tubelist)

In [None]:
# Mechanism for totalling up all the currents for combinations and complimentary pairs

# A function for looking up the three tube currents of interest by passing the tube number
def get_currents(tube_ID, tubelist_df):
    tube_data_series = (tubelist_df.loc[tubelist_df['tube_ID'] == tube_ID].iloc[0])[3:]
    tube_data_tuple = tuple([float(i) for i in tube_data_series.to_list()])
    return tube_data_tuple



# A function for totalling up the currents of combinations and complimentary pairs
# Returns a dictionary

def total_up_currents(combination_pair):
    combination_bias_2 = 0.0
    combination_bias_6 = 0.0
    combination_bias_10 = 0.0
    combination_bias_14 = 0.0
    combination_bias_18 = 0.0
    combination_bias_22 = 0.0
    combination_bias_26 = 0.0
    combination_bias_30 = 0.0
    combination_bias_34 = 0.0
    combination_bias_38 = 0.0
    combination_bias_42 = 0.0
    combination_bias_46 = 0.0
    combination_bias_50 = 0.0
    compliment_bias_2 = 0.0
    compliment_bias_6 = 0.0
    compliment_bias_10 = 0.0
    compliment_bias_14 = 0.0
    compliment_bias_18 = 0.0    
    compliment_bias_22 = 0.0    
    compliment_bias_26 = 0.0    
    compliment_bias_30 = 0.0    
    compliment_bias_34 = 0.0    
    compliment_bias_38 = 0.0    
    compliment_bias_42 = 0.0    
    compliment_bias_46 = 0.0    
    compliment_bias_50 = 0.0   
    combination = combination_pair[0]
    compliment = combination_pair[1]
    
    for tube in combination:
        combination_currents_tuple = get_currents(tube, tubelist_df)
        combination_bias_2 = combination_bias_2 + combination_currents_tuple[0]
        combination_bias_6 = combination_bias_6 + combination_currents_tuple[1]
        combination_bias_10 = combination_bias_10 + combination_currents_tuple[2]
        combination_bias_14 = combination_bias_14 + combination_currents_tuple[3]
        combination_bias_18 = combination_bias_18 + combination_currents_tuple[4]
        combination_bias_22 = combination_bias_22 + combination_currents_tuple[5]
        combination_bias_26 = combination_bias_26 + combination_currents_tuple[6]
        combination_bias_30 = combination_bias_30 + combination_currents_tuple[7]
        combination_bias_34 = combination_bias_34 + combination_currents_tuple[8]
        combination_bias_38 = combination_bias_38 + combination_currents_tuple[9]
        combination_bias_42 = combination_bias_42 + combination_currents_tuple[10]
        combination_bias_46 = combination_bias_46 + combination_currents_tuple[11]
        combination_bias_50 = combination_bias_50 + combination_currents_tuple[12]
    for tube in compliment:
        compliment_currents_tuple = get_currents(tube, tubelist_df)
        compliment_bias_2 = compliment_bias_2 + compliment_currents_tuple[0]
        compliment_bias_6 = compliment_bias_6 + compliment_currents_tuple[1]
        compliment_bias_10 = compliment_bias_10 + compliment_currents_tuple[2]
        compliment_bias_14 = compliment_bias_14 + compliment_currents_tuple[3]
        compliment_bias_18 = compliment_bias_18 + compliment_currents_tuple[4]
        compliment_bias_22 = compliment_bias_22 + compliment_currents_tuple[5]
        compliment_bias_26 = compliment_bias_26 + compliment_currents_tuple[6]
        compliment_bias_30 = compliment_bias_30 + compliment_currents_tuple[7]
        compliment_bias_34 = compliment_bias_34 + compliment_currents_tuple[8]
        compliment_bias_38 = compliment_bias_38 + compliment_currents_tuple[9]
        compliment_bias_42 = compliment_bias_42 + compliment_currents_tuple[10]
        compliment_bias_46 = compliment_bias_46 + compliment_currents_tuple[11]
        compliment_bias_50 = compliment_bias_50 + compliment_currents_tuple[12]
        
    pair_bias_2_error = combination_bias_2**2 - compliment_bias_2**2   
    pair_bias_6_error = combination_bias_6**2 - compliment_bias_6**2   
    pair_bias_10_error = combination_bias_10**2 - compliment_bias_10**2   
    pair_bias_14_error = combination_bias_14**2 - compliment_bias_14**2   
    pair_bias_18_error = combination_bias_18**2 - compliment_bias_18**2   
    pair_bias_22_error = combination_bias_22**2 - compliment_bias_22**2   
    pair_bias_26_error = combination_bias_26**2 - compliment_bias_26**2   
    pair_bias_30_error = combination_bias_30**2 - compliment_bias_30**2   
    pair_bias_34_error = combination_bias_34**2 - compliment_bias_34**2   
    pair_bias_38_error = combination_bias_38**2 - compliment_bias_38**2   
    pair_bias_42_error = combination_bias_42**2 - compliment_bias_42**2   
    pair_bias_46_error = combination_bias_46**2 - compliment_bias_46**2   
    pair_bias_50_error = combination_bias_50**2 - compliment_bias_50**2   

    pair_total_error = (pair_bias_2_error + pair_bias_6_error + pair_bias_10_error + pair_bias_14_error
                        + pair_bias_22_error + pair_bias_26_error + pair_bias_30_error + pair_bias_34_error
                        + pair_bias_38_error + pair_bias_42_error + pair_bias_46_error + pair_bias_50_error)

    combo_pair_dict = {'combination' : combination, 'compliment' : compliment, 'pair_bias_2_error' : pair_bias_2_error,
                       'pair_bias_6_error' : pair_bias_6_error, 'pair_bias_10_error' : pair_bias_10_error, 'pair_bias_14_error' : 
                       pair_bias_14_error, 'pair_bias_18_error' : pair_bias_18_error, 'pair_bias_22_error' : pair_bias_22_error,
                       'pair_bias_26_error' : pair_bias_26_error, 'pair_bias_30_error' : pair_bias_30_error, 'pair_bias_34_error' : 
                       pair_bias_34_error, 'pair_bias_38_error' : pair_bias_38_error, 'pair_bias_42_error' : pair_bias_42_error, 
                       'pair_bias_46_error' : pair_bias_46_error, 'pair_bias_50_error' : pair_bias_50_error, 'pair_total_error' : pair_total_error}
                              
    return combo_pair_dict



# A function to iterate through the all_possiuble_pairs list and total up all the currents
# Should make a master list of dictionaries.

def total_up_everything(all_possible_pairs, tubelist_df):
    list_of_combination_dicts = []
    for combination_pair in all_possible_pairs:
        temp_dict = total_up_currents(combination_pair)
        list_of_combination_dicts.append(temp_dict)
    return list_of_combination_dicts

list_of_combination_dicts = total_up_everything(all_possible_pairs, tubelist_df)
combinations_df = pd.DataFrame(list_of_combination_dicts)

winner = combinations_df[combinations_df.pair_total_error == combinations_df.pair_total_error.min()]

combinations_df.reindex(combinations_df.pair_total_error.abs().sort_values().index).head()



In [None]:
winner = winner.squeeze()
winning_combination = list(winner[0])
winning_compliment = list(winner[1])

In [None]:
# A function for totalling up the currents of combinations and complimentary pairs
# Returns a dictionary

def other_total_up_currents(combination_pair, temp_columns):
    combination_set = combination_pair[0]
    compliment_set = combination_pair[1]
    combination_sum = [0 for i in range(14)]
    compliment_sum = [0 for i in range(14)]

    for tube in combination_set:
        temp_tube = get_currents(tube, tubelist_df)
        combination_sum = [x + y for x, y in zip(combination_sum, temp_tube)]

    for tube in compliment_set:
        temp_tube = get_currents(tube, tubelist_df)
        compliment_sum = [x + y for x, y in zip(compliment_sum, temp_tube)]
        
    x_values = [int(i[5:]) for i in temp_columns if i.startswith('Bias_') == True]  
    
    combination_dict = dict(zip(x_values, combination_sum))
    combination_dict.update({'combination_set' : combination_set})
    compliment_dict = dict(zip(x_values, compliment_sum))
    compliment_dict.update({'compliment_set' : compliment_set})
    
    return combination_sum, compliment_sum
        
combination_sum, compliment_sum = other_total_up_currents(all_possible_pairs[0], temp_columns)    

In [None]:
# Make a function to plot the combination and the compliment on the same plot to compare

from bokeh.plotting import figure, output_file, show

output_file("Combination and Complimentary Pairs.html")

# Configure the size, title, etc.
p = figure(plot_width=1000, plot_height=750, title=str(winning_combination + winning_compliment))
p.title.text_color = "black"

# Make a list for the x_values by chopping off all the "bias_" column name prefixes and converting to ints
temp_columns = list(df_stats.columns)
stats_columns = []
x_values = [int(i[5:]) for i in temp_columns if i.startswith('Bias_') == True]        

# Add all rows of tubes
row = combination_sum
row2 = compliment_sum
p.line(x_values, row, line_width=2)
p.line(x_values, row2, line_width=2)

# Set axis labels
p.xaxis.axis_label = "Grid Voltage (V)"
p.xaxis.axis_label_text_color = "#aa6666"
p.yaxis.axis_label = "Plate Current (mA)"

# Set grid lines
p.ygrid.minor_grid_line_color = 'navy'
p.ygrid.minor_grid_line_alpha = 0.2
p.xgrid.minor_grid_line_color = 'navy'
p.xgrid.minor_grid_line_alpha = 0.2

# Set background color
p.background_fill_color = "beige"
p.background_fill_alpha = 0.5

# Show the plot
show(p)

# STEP 6: Find the best matches for a particular tube_ID


In [None]:
def find_best_set_for_chosen_tube_ID():
    
    tube_to_match = input("What tube number would you like to match? ")
    tube_set_size = int(input("How many matches would you like? ")) + 1
    
    # Create a new dataframe showing only tube_set_size number of nearest matches to the chosen tube_to_match
    df_tube_set = df_dif[(df_dif.ref_tube_ID == str(tube_to_match))].sort_values('error_sum').head(tube_set_size)
    
    return df_tube_set

df_tube_set = find_best_set_for_chosen_tube_ID()
df_tube_set.head(int(tube_set_size))

# STEP 4: Show the distribution of a single bias voltage


In [None]:
# Histogram of a single column

import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import numpy as np

sigma = df_stats.iat[2, 5]
mu = df_stats.iat[1, 5]
x = mu + sigma  * np.random.randn(437)
num_bins = 50

fig, ax = plt.subplots()

# the histogram of the data
n, bins, patches = ax.hist(x, num_bins, normed=1)

# add a 'best fit' line
y = mlab.normpdf(bins, mu, sigma)
ax.plot(bins, y, '--')
ax.set_xlabel('Current (mA)')
# ax.set_ylabel('Probability density')
ax.set_title(r'Histogram of -10V grid voltage')

# Tweak spacing to prevent clipping of ylabel
fig.tight_layout()
plt.show()