# Overview
This notebook assumes that you have already dimensionally reduced your data and is intended to create visualizations or subset data in interesting ways.

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import collections
import gzip
import numpy as np
import os
import time

import pandas as pd

from collections import defaultdict
import itertools

# Interactive HTML tools
from ipywidgets import interact
import bokeh
import bokeh.io
from bokeh.io import push_notebook
from bokeh.plotting import figure, show, save, output_notebook, output_file
from bokeh.palettes import Category20b
from bokeh.palettes import Category10

# Notes

* QC reports on genotype data available here: https://hrs.isr.umich.edu/data-products/genetic-data/products#gdv1
* PCs were generated in PLINK

PLINK command used to create cleaned HRS data:

```
plink --bfile HRS --maf 0.05 --mind 0.1 --geno 0.1 --hwe 1e-6 --make-bed --out HRS_CLEAR
```

Relevant parameters: Minor Allele Freq, Missing genotype rates (per-sample) Missing genotype rates (per-variant), 
Hardy-Weinberg equilibrium

PLINK command used to create PCs:
```
plink --bfile HRS_CLEAR --pca [n]
```

PLINK command used to create PCs for white-identified individuals in HRS:
```
plink --bfile HRS_CLEAR --pca [n] --keep hrs_subset_white.txt
```

This code holds for the hispanic- and black-identified populations as well.

# Importing data

In [71]:
# Directories
hrs_data_dir = '/Volumes/Stockage/alex/hrs/projections'
hrs_aux_dir = '/Volumes/Stockage/alex/hrs/aux'

# Auxiliary data
aux_file = 'allIndivs_filtered.txt'
# Specify the principal component files
pc_files = ['plink.eigenvec_200','HRS_PCA_black.eigenvec','HRS_PCA_hispanic.eigenvec','HRS_PCA_white.eigenvec']

# Specify which one we want to use
pc_file = pc_files[0]

aux_path = os.path.join(hrs_aux_dir, aux_file)
pc_path = os.path.join(hrs_data_dir, pc_file)

# Import auxiliary data. Contains IDs and demographic information.
# NOTE: The auxiliary data is sorted in an order different from the PC data.
aux_data = []
with open(aux_path) as input_file:
    for line in input_file:
        aux_data.append(line.strip().split(','))

# Import PC data. This data must be converted to an array.
with open(pc_path) as h:
    hrs_contents = h.readlines()

hrs_data = []

for h in hrs_contents:
    hrs_data.append(h.split()[2:len(h)])

hrs_data_array = np.array(hrs_data).astype(np.float)

In [72]:
# Set up a dict containing subsets and their related files.
# This dict contains an array for each of the population's PC data
hrs_subsets_dict = {'All':'plink.eigenvec_200',
                    'Black':'HRS_PCA_black.eigenvec',
                   'Hispanic':'HRS_PCA_hispanic.eigenvec',
                    'White':'HRS_PCA_white.eigenvec'}

aux_path = os.path.join(hrs_aux_dir, aux_file)

aux_data = []
with open(aux_path) as input_file:
    for line in input_file:
        aux_data.append(line.strip().split(','))

hrs_data_dict = defaultdict(np.array)

for p in hrs_subsets_dict:
    pc_path = os.path.join(hrs_data_dir, hrs_subsets_dict[p])
    
    # Import PC data. This data must be converted to an array.
    with open(pc_path) as h:
        hrs_contents = h.readlines()

    hrs_data = []
        
    for h in hrs_contents:
        hrs_data.append(h.split()[2:len(h)])

    hrs_data_dict[p] = np.array(hrs_data).astype(np.float)

In [73]:
# Import admixture proportion estimates
admix_dir = hrs_aux_dir

# Separated into two datasets (roughly speaking, African Americans and European Americans)
admix_data = []

# Import data from first file
with open(os.path.join(admix_dir,'HRS.txt')) as input_file:
    for line in input_file:
        admix_data.append(line.strip().split())

# Import data from second file
with open(os.path.join(admix_dir,'HRS_EurAm.txt')) as input_file:
    for line in input_file:
        admix_data.append(line.strip().split())

In [74]:
# Transform this into a Pandas data frame and convert the numeric values to... numeric
# ID is stored as numeric as well. This is to match with HRS data, which is sorted numerically by IDs
admix_df = pd.DataFrame.from_records(admix_data, columns=['ID','ADMIX1','ADMIX2','ADMIX3'])
admix_df[['ADMIX1','ADMIX2','ADMIX3']] = admix_df[['ADMIX1','ADMIX2','ADMIX3']].apply(pd.to_numeric)
admix_df['ID'] = admix_df.ID.astype(np.int64)

# Data preparation
Data from the HRS is not as neatly organized as in the UKBB or 1KGP. There is no variable for ethnicity, though we can create a proxy by combining variables for self-identified race, Hispanic status, and Mexican-American status. We need to create a variety of dictionaries to deal with these combinations as well. Since we're working with subsets a bit we need to prepare for that too.

There are multiple variables that need to be selected:
1. Which population to use
2. How we want to colour the points (e.g. by birth region, by ethnicity proxy, etc)

In [122]:
# Import the data and convert it to a pandas dataset for easier multivariable sorting.
# Rename columns (to deal with special characters)
hrs_labels = ['IndID','FamID','BirthYear','HispanicStatus','DetailedHispanicStatus','Race_HRS','BirthRegionNum',
             'BirthRegionName','AgeRange','Gender','Race_dbGaP']
hrs_df = pd.DataFrame.from_records(aux_data[1:],columns = hrs_labels)
hrs_df[['FamID','IndID']] = hrs_df[['FamID','IndID']].apply(pd.to_numeric)

# Create a new dataset and reset the index
hrs_df_sorted = hrs_df.sort_values(by=['FamID','IndID'])
hrs_df_sorted = hrs_df_sorted.reset_index(drop=True)

In [123]:
# Join the admixture data - this gets used at the end for different colourings.
hrs_joined = hrs_df_sorted.merge(admix_df, left_on='IndID', right_on='ID', how='left')

In [124]:
# Create an auxiliary dataset for each subset
# This is necessary so the indices match up when we want to attach a category/label to our points
aux_data_dict = defaultdict(list)

# Create a subset of our population and add it to the dict
for hrs_pop in hrs_subsets_dict:
    if hrs_pop in ['White','Black']:
        hrs_df_subset = hrs_df_sorted.loc[(hrs_df_sorted['Race_HRS'] == hrs_pop)]
    elif hrs_pop in ['Hispanic']:
        hrs_df_subset = hrs_df_sorted.loc[(hrs_df_sorted['HispanicStatus'] == 'Hispanic')]
    elif hrs_pop == 'All':
        hrs_df_subset = hrs_df_sorted
    
    hrs_df_subset_list = hrs_df_subset.values.tolist()
    aux_data_dict[hrs_pop] = hrs_df_subset_list 

In [125]:
# Define our dictionaries.
# Create dicts for these values.
hisp_dict = {'Hispanic':'H', 'Not_Hispanic':'N'}
mex_dict = {'Mexican-American':'M', 'N/A':'N', 'Other':'O', 'Type_Unknown':'U'}
race_dict = {'Black':'B', 'Other':'O', 'White':'W'}
brn_dict = {'East_North_Central':'ENC', 'East_South_Central':'ESC', 'Middle_Atlantic':'MAT',
           'Mountain':'MNT', 'New_England':'ENG', 'Not_In_Contiguous_US':'NIC','Pacific':'PAC',
            'South_Atlantic':'SAT', 'West_North_Central':'WNC', 'West_South_Central':'WSC'}
racedb_dict = {'AfrAm':'AA', 'Not_AfrAm':'NAA'}

# Create reverse lookups. We don't have to use defaultdicts as this is 1-1
hisp_dict_rev = dict()
mex_dict_rev = dict()
race_dict_rev = dict()
brn_dict_rev = dict()
racedb_dict_rev = dict()

for key, value in hisp_dict.items():
    hisp_dict_rev.update({value: key})
    
for key, value in mex_dict.items():
    mex_dict_rev.update({value: key})

for key, value in race_dict.items():
    race_dict_rev.update({value: key})

for key, value in brn_dict.items():
    brn_dict_rev.update({value: key})
    
for key, value in racedb_dict.items():
    racedb_dict_rev.update({value: key})

Select a set of data here:
* All
* Black
* Hispanic
* White

In [140]:
subset = 'All'

In [141]:
try:
    subset
except NameError:
    print('Subset not defined. Proceeding with full set of observations.')
else:
    if subset == 'White':
        hrs_subset_indices = hrs_df_sorted.loc[(hrs_df_sorted['Race_HRS'] == subset)].index
        hrs_df_sorted = hrs_df_sorted.loc[(hrs_df_sorted['Race_HRS'] == 'White')]
        print('Subsetting HRS data: White')
    elif subset == 'Black':
        hrs_subset_indices = hrs_df_sorted.loc[(hrs_df_sorted['Race_HRS'] == subset)].index
        hrs_df_sorted = hrs_df_sorted.loc[(hrs_df_sorted['Race_HRS'] == 'Black')]
        print('Subsetting HRS data: Black')
    elif subset == 'Hispanic':
        hrs_subset_indices = hrs_df_sorted.loc[(hrs_df_sorted['HispanicStatus'] == subset)].index
        hrs_df_sorted = hrs_df_sorted.loc[(hrs_df_sorted['HispanicStatus'] == 'Hispanic')]
        print('Subsetting HRS data: Hispanic')
    elif subset == 'All':
        hrs_subset_indices = hrs_df_sorted.index
        print('Using all entries - no subsetting carried out.')
    else:
        print('Subset "' + str(subset) + '" not recognized. Using full HRS dataset.')
    
# Use the sorted dataframe for auxiliary data
aux_data_sorted = hrs_df_sorted.values.tolist()
aux_data_sorted.insert(0,aux_data[0])

aux_data = aux_data_sorted

Using all entries - no subsetting carried out.


In [142]:
# These auxiliary data sets will define how we label the observations

# Columns are:
# 0 = ID, 1 = Family ID, 2 = Birth Year
# 3 = Hispanic, 4 = Detailed Hispanic, 5 = Race, 6 = Birth Region, 7 = Birth region name
# 10= dbGaP race (Note: Black != AfrAm and White != Not_AfrAm)

# Create multiple types of categorization based on variables to include
aux_data_1 = [] # 1 - Birth region, race, Hispanic status, Mexican status
aux_data_2 = [] # 2 - Race, Hispanic status, Mexican status
aux_data_3 = [] # 3 - Birth region, race
aux_data_4 = [] # 4 - Race, Hispanic status
aux_data_5 = [] # 5 - Birth region
aux_data_6 = [] # 6 - Birth region, Hispanic status, Mexican status

individuals_hrs = []

# Get the lists (skip the first row as it's a header)
#for a in aux_data[1:]:
for a in aux_data_dict[subset][0:]:
    individuals_hrs.append(a[0])
    
    temp_element = [a[0], '_'.join([brn_dict[a[7]], race_dict[a[5]], hisp_dict[a[3]], mex_dict[a[4]]])]    
    aux_data_1.append(temp_element)
    
    temp_element = [a[0], '_'.join([race_dict[a[5]], hisp_dict[a[3]], mex_dict[a[4]]])]
    aux_data_2.append(temp_element)
    
    temp_element = [a[0], '_'.join([brn_dict[a[7]], race_dict[a[5]]])]
    aux_data_3.append(temp_element)
    
    temp_element = [a[0], '_'.join([race_dict[a[5]], hisp_dict[a[3]]])]
    aux_data_4.append(temp_element)
    
    temp_element = [a[0], '_'.join([brn_dict[a[7]]])]
    aux_data_5.append(temp_element)
    
    temp_element = [a[0], '_'.join([brn_dict[a[7]], hisp_dict[a[3]], mex_dict[a[4]]])]
    aux_data_6.append(temp_element)

Select which of the auxiliary datasets we will use to label observations

In [143]:
aux_to_use = aux_data_5

In [144]:
# We must define the population dictionary we wish to use
# The following gives us a collection of all categories of some population and/or proxy for ethnicity:
eth_proxy_set = set([a[1] for a in aux_to_use])
pop_dict = dict()

for e in eth_proxy_set:
    el = e.split('_')
    
    if aux_to_use == aux_data_1:    
        temp_brn = brn_dict_rev[el[0]]
        temp_race = race_dict_rev[el[1]]
        temp_hisp = hisp_dict_rev[el[2]]
        temp_mex = mex_dict_rev[el[3]]
        
        pop_dict.update({e:temp_brn + ' ' + temp_race + ' ' + temp_hisp + ' ' + temp_mex})
    elif aux_to_use == aux_data_2:
        temp_race = race_dict_rev[el[0]]
        temp_hisp = hisp_dict_rev[el[1]]
        temp_mex = mex_dict_rev[el[2]]
        temp_list = [temp_race, temp_hisp, temp_mex]
        
        pop_dict.update({e:temp_race + ' ' + temp_hisp + ' ' + temp_mex})
    elif aux_to_use == aux_data_3:
        temp_brn = brn_dict_rev[el[0]]
        temp_race = race_dict_rev[el[1]]
        
        pop_dict.update({e: temp_brn + ' ' + temp_race})
    elif aux_to_use == aux_data_4:
        temp_race = race_dict_rev[el[0]]
        temp_hisp = hisp_dict_rev[el[1]]
        
        pop_dict.update({e: temp_race + ' ' + temp_hisp})
    elif aux_to_use == aux_data_5:
        temp_brn = brn_dict_rev[el[0]]
        
        pop_dict.update({e: temp_brn})
    elif aux_to_use == aux_data_6:
        temp_brn = brn_dict_rev[el[0]]
        temp_hisp = hisp_dict_rev[el[1]]
        temp_mex = mex_dict_rev[el[2]]
        
        pop_dict.update({e: temp_brn + ' ' + temp_hisp + ' ' + temp_mex})

In [145]:
# Define a colour set - this supports up to 30 colours
from bokeh.palettes import Category20, Category20b, Category20c, PRGn, Set1

color_dict_hrs = {}

for j, pop in enumerate(eth_proxy_set):
    if j < 20:
        color_dict_hrs[pop] = Category20[20][j]
    elif j < 30:
        color_dict_hrs[pop] = PRGn[10][j%20]
    else:
        color_dict_hrs[pop] = Set1[9][j%30]

# The code from above works okay but sometimes generates different colours for same groups (i.e. inconsistent colours
# between our plots)
# Define some more specific colour dictionaries for the following three categories:
# 1 - Birth region (10)
# 2 - Race + Hispanic status (6)
# 3 - Race + Hispanic status + Mexican status (8)

# Birth regions are US census regions. Five divisions:
# WEST: Pacfic (PAC), Mountain (MNT)
# MIDWEST: West North Central (WNC), East North Central (ENC)
# SOUTH: West South Central (WSC), East South Central (ESC), South Atlantic (SAT)
# NORTHEAST: Middle Atlantic (MAT), New England (ENG)
# Not in Contiguous US (NIC)

color_dict_born = {}

color_dict_born['ENG']=Category20b[20][1] # New England (purple)
color_dict_born['MAT']=Category20b[20][3] # Mid Atlantic (light purple)
color_dict_born['SAT']=Category20b[20][-2] # South Atlantic (pinkish)
color_dict_born['ESC']=Category20b[20][-4] # East South Central (purplish-pink)
color_dict_born['WSC']=Category20b[20][-6] # West South Central (rose-ish)
color_dict_born['ENC']=Category20c[20][3] # East North Central (light blue)
color_dict_born['WNC']=Category20c[20][0] # West North Central (blue)
color_dict_born['MNT']=Category20b[20][5] # Mountain (Green)
color_dict_born['PAC']=Category20b[20][7] # Pacific (lighter green)
color_dict_born['NIC']=Category20c[20][-3] # Not in contiguous US (grey)

color_dict_race_hisp = {}

color_dict_race_hisp['B_H']=Category20b[20][2] # Black and Hispanic
color_dict_race_hisp['B_N']=Category20b[20][3] # Black and not Hispanic
color_dict_race_hisp['O_H']=Category20b[20][4] # Other and Hispanic
color_dict_race_hisp['O_N']=Category20b[20][6] # Other and not Hispanic
color_dict_race_hisp['W_H']=Category20c[4][-3] # White and Hispanic
color_dict_race_hisp['W_N']=Category20c[4][-1] # White and not Hispanic

color_dict_race_hisp_mex = {}

color_dict_race_hisp_mex['B_H_O']=Category20c[5][-1]# Black Hispanic non-Mexican (orange)
color_dict_race_hisp_mex['B_N_N']=Category20c[7][-1] # Black not Hispanic (light orange)
color_dict_race_hisp_mex['O_H_M']=Category20b[20][4] # Other Hispanic Mexican (dark green)
color_dict_race_hisp_mex['O_H_O']=Category20b[20][6] # Other Hispanic non-Mexican (green)
color_dict_race_hisp_mex['O_N_N']=Category20c[13][-1] # Other not Hispanic non-Mexican (poiple)
color_dict_race_hisp_mex['W_H_M']=Category20c[9][-1] # White Hispanic Mexican (green)
color_dict_race_hisp_mex['W_H_O']=Category20c[11][-1] # White Hispanic non-Mexican (lighter green)
color_dict_race_hisp_mex['W_H_U']=Category20c[12][-1] # White Hispanic Unknown (even lighter green)
color_dict_race_hisp_mex['W_N_N']=Category20c[4][0] # White not Hispanic non-Mexican (blue!)

In [146]:
# Set up an index of each population member and vice versa
# We want to quickly access a given individual's population and a given population's individuals
population_by_individual_hrs = defaultdict(int)
individuals_by_population_hrs = defaultdict(list)

for a in aux_to_use:
    population_by_individual_hrs[a[0]] = a[1]
    individuals_by_population_hrs[a[1]].append(a[0])
    
indices_of_population_members_hrs = defaultdict(list)

for index, indiv in enumerate(individuals_hrs):
    try:
        indices_of_population_members_hrs[population_by_individual_hrs[indiv]].append(index)
    except KeyError:
        continue

In [147]:
# Define a colour dictionary based on individual admixture proportions.
# We have three values - this works nicely with an RGB tuple.
hrs_joined_sorted = hrs_joined.values.tolist()

temp_list = [h[-3:] for h in hrs_joined_sorted]
hrs_joined_sorted_array = np.array(temp_list)
hrs_joined_sorted_array = (255*hrs_joined_sorted_array).astype(np.int64)
hrs_joined_sorted_array.shape

(12454, 3)

In [148]:
color_dict_admix = defaultdict(int)
for i in range(0, len(hrs_joined_sorted)):
    color_dict_admix[i] = '#%02x%02x%02x' % (hrs_joined_sorted_array[i][0],
                                             hrs_joined_sorted_array[i][1],
                                             hrs_joined_sorted_array[i][2])

color_list_admix = list()
for i in range(0, len(hrs_joined_sorted)):
    color_list_admix.append('#%02x%02x%02x' % (hrs_joined_sorted_array[i][0],
                                             hrs_joined_sorted_array[i][1],
                                             hrs_joined_sorted_array[i][2]))

In [117]:
sorted(list(set(hrs_subset_indices) & set(indices_of_population_members_hrs['NIC'])))

[51,
 92,
 101,
 102,
 106,
 109,
 110,
 116,
 162,
 163,
 170,
 185,
 193,
 207,
 216,
 232,
 259,
 269,
 287,
 330,
 355,
 358,
 366,
 398,
 414,
 438,
 453,
 462,
 466,
 497,
 539,
 584,
 598,
 609,
 614,
 644,
 649,
 657,
 673,
 700,
 712,
 720,
 775,
 786,
 821,
 829,
 835,
 847,
 870,
 901,
 920,
 940,
 956,
 963,
 968,
 976,
 989,
 994,
 1009,
 1028,
 1038,
 1079,
 1087,
 1095,
 1102,
 1123,
 1131,
 1137,
 1155,
 1173,
 1181,
 1190]

# Generate interactive HTML

In [158]:
# Define a function to generate interactive HTML files
def hrs_create_int_html(proj, fig_title, fname, page_title):
    # Import TSNE PC projections from file and export HTML
    temp_array = proj

    component_1_id = 0
    component_2_id = 1

    p = figure(plot_width=1500, plot_height=800)
    p.title.text = fig_title

    for pop in sorted(eth_proxy_set):
        proj_pop = temp_array[indices_of_population_members_hrs[pop]]
        p.circle(proj_pop[:,component_1_id], proj_pop[:,component_2_id], legend=pop_dict[pop],
                 color = color_dict[pop])

    p.legend.location = "top_left"

    p.legend.click_policy="hide"

    output_file(fname + '.html',title=page_title)

    save(p)
    print('Saved interactive HTML for ' + fig_title)

In [159]:
# Same function as above, but coloured in by estimated admixture proportions
def hrs_create_int_html_admix(proj, ptitle, fname, ftitle):
    p = figure(plot_width=1500, plot_height=800)
    p.title.text = ptitle

    for pop in sorted(eth_proxy_set):
        proj_pop = proj[indices_of_population_members_hrs[pop]]
        p.circle(proj_pop[:,0], proj_pop[:,1], legend=pop_dict[pop],
                 color = [color_list_admix[i] for i in indices_of_population_members_hrs[pop]])

    p.legend.location = "top_left"

    p.legend.click_policy="hide"

    output_file(fname + '.html',title=ftitle)

    save(p)
    print('Saved interactive admixture HTML for ' + fig_title)

In [160]:
def hrs_create_image(proj, ptitle, fname, ftitle):
    proj = np.loadtxt(proj)

    fig = plt.figure(figsize=(20,20))
    ax = fig.add_subplot(111, aspect=1)

    for pop in sorted(eth_proxy_set):
        temp_proj = proj[indices_of_population_members_hrs[pop]]
        ax.scatter(temp_proj[:,0], temp_proj[:,1], label=pop_dict[pop], alpha=0.6, color=color_dict[pop])

    ax.legend(ncol=3,loc='lower center', bbox_to_anchor=(0.55,-0.15), fontsize=12,markerscale=3)
    fig.savefig(fname + '.jpeg',format='jpeg')
    plt.close()
    print('Saved image for ' + fig_title)

In [161]:
proj_dir = '/Volumes/Stockage/alex/hrs/projections'
out_dir = '/Volumes/Stockage/alex/hrs/sandbox'

file = 'HRS_UMAP_PC10_NN15_MD0.5_2018330153123'

if aux_to_use == aux_data_1:
    # Not used
    aux_label = 'BORN_RACE_HISP_MEX'
elif aux_to_use == aux_data_2:
    aux_label = 'RACE_HISP_MEX'
    color_dict = color_dict_race_hisp_mex
elif aux_to_use == aux_data_3:
    # Not used
    aux_label = 'BORN_RACE'
elif aux_to_use == aux_data_4:
    aux_label = 'RACE_HISP'
    color_dict = color_dict_race_hisp
elif aux_to_use == aux_data_5:
    aux_label = 'BORN'
    color_dict = color_dict_born
elif aux_to_use == aux_data_6:
    # Not used
    aux_label = 'BORN_HISP_MEX'

if subset=='All':
    try:
        temp_proj = np.loadtxt(os.path.join(proj_dir, file))
        out_fig_title = file
        out_file = os.path.join(out_dir, file + '_' + aux_label)
        out_page_title = file
        hrs_create_int_html(temp_proj, out_fig_title, out_file, out_page_title)
        hrs_create_image(temp_proj, out_fig_title, out_file, out_page_title)
        hrs_create_int_html_admix(temp_proj, out_fig_title, out_file+'_ADMIX', out_page_title)
    except Exception as e:
        print(e)
        print(file)
elif subset=='Hispanic':
    # Work only with the Hispanic subset
    try:
        temp_proj = np.loadtxt(os.path.join(proj_dir, file))
        out_fig_title = file
        out_file = os.path.join(out_dir, file + '_' + aux_label)
        out_page_title = file
        hrs_create_int_html(temp_proj, out_fig_title, out_file, out_page_title)
        hrs_create_image(temp_proj, out_fig_title, out_file, out_page_title)
        hrs_create_int_html_admix(temp_proj, out_fig_title, out_file+'_ADMIX', out_page_title)
    except Exception as e:
        print(e)
        print(file)
elif subset=='Black':
    try:
        temp_proj = np.loadtxt(os.path.join(proj_dir, file))
        out_fig_title = file
        out_file = os.path.join(out_dir, file + '_' + aux_label)
        out_page_title = file
        hrs_create_int_html(temp_proj, out_fig_title, out_file, out_page_title)
        hrs_create_image(temp_proj, out_fig_title, out_file, out_page_title)
        hrs_create_int_html_admix(temp_proj, out_fig_title, out_file+'_ADMIX', out_page_title)
    except Exception as e:
        print(e)
        print(file)
elif subset=='White':
    try:
        temp_proj = np.loadtxt(os.path.join(proj_dir, file))
        out_fig_title = file
        out_file = os.path.join(out_dir, file + '_' + aux_label)
        out_page_title = file
        hrs_create_int_html(temp_proj, out_fig_title, out_file, out_page_title)
        hrs_create_image(temp_proj, out_fig_title, out_file, out_page_title)
        hrs_create_int_html_admix(temp_proj, out_fig_title, out_file+'_ADMIX', out_page_title)
    except Exception as e:
        print(e)
        print(file)

Saved interactive HTML for HRS_UMAP_PC10_NN15_MD0.5_2018330153123
could not convert string to float: b'['
HRS_UMAP_PC10_NN15_MD0.5_2018330153123
