In [19]:
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.axes_grid1 import make_axes_locatable

import collections
from collections import defaultdict

import itertools
import matplotlib
import numpy as np
import os
import pickle
import sys

In [20]:
# Define save/load functions from pickle
# https://stackoverflow.com/questions/4529815/saving-an-object-data-persistence
def save_object(obj, filename):
    with open(filename, 'wb') as output:  # Overwrites any existing file.
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)

# sample usage
#save_object(company1, 'company1.pkl')

def load_object(filename):
    with open(filename, "rb") as inp:
        try:
            tempobj = pickle.load(inp)
            return tempobj
        except:
            print("Error loading object:", sys.exc_info()[0])
            raise

In [21]:
# Set up my directories
# Projections
proj_dir = "proj"

# Auxiliary data (colours, dictionaries, etc)
aux_dir = "files"

In [22]:
# Load in the auxiliary variables
color_dict_ukbb = load_object(os.path.join(aux_dir, "color_dict_ukbb"))
indices_of_population_members = load_object(os.path.join(aux_dir, "indices_of_population_members"))
markers_dict = load_object(os.path.join(aux_dir, "markers_dict"))u

In [23]:
# Put together a dict of values
# Taken from: http://biobank.ctsu.ox.ac.uk/crystal/coding.cgi?id=1001
# This dictionary covers all given ethnicities and their UKBB codes
ukbb_eth_dict = {
    '1':'White',
    '1001':'British',
    '1002':'Irish',
    '1003':'Any other white background',
    '2':'Mixed',
    '2001':'White and Black Caribbean',
    '2002':'White and Black African',
    '2003':'White and Asian',
    '2004':'Any other mixed background',
    '3':'Asian or Asian British',
    '3001':'Indian',
    '3002':'Pakistani',
    '3003':'Bangladeshi',
    '3004':'Any other Asian background',
    '4':'Black or Black British',
    '4001':'Caribbean',
    '4002':'African',
    '4003':'Any other Black background',
    '5':'Chinese',
    '6':'Other ethnic group',
    '-1':'Do not know',
    '-3':'Prefer not to answer',
    '-9':'Not available'
}

# Child categories of ethnicities
ukbb_dict_child = {
    '1001':'British',
    '1002':'Irish',
    '1003':'Any other white background',
    '2001':'White and Black Caribbean',
    '2002':'White and Black African',
    '2003':'White and Asian',
    '2004':'Any other mixed background',
    '3001':'Indian',
    '3002':'Pakistani',
    '3003':'Bangladeshi',
    '3004':'Any other Asian background',
    '4001':'Caribbean',
    '4002':'African',
    '4003':'Any other Black background',
    '5':'Chinese',
    '6':'Other ethnic group',
    '-1':'Do not know',
    '-3':'Prefer not to answer',
    '-9':'Not available'    
}

# Parent categories of ethnicities
ukbb_dict_parent = {
    '1':'White',
    '2':'Mixed',
    '3':'Asian or Asian British',
    '4':'Black or Black British',
    '5':'Chinese',
    '6':'Other ethnic group',
    '-':'NA'
}

# Secondary relationship between parent-child ethnicities
# Keys are parent ethnicities (e.g. 'Asian or Asian British') and value is a list of child ethnicities ('Indian', etc)
ukbb_eth_dict_parent = defaultdict(list)

for key,value in ukbb_eth_dict.items():
    parent = key[0]
    
    if key not in ['1','2','3','4']:
        try:
            ukbb_eth_dict_parent[ukbb_dict_parent[parent]].append(value)
        except KeyError:
            ukbb_eth_dict_parent[ukbb_dict_parent[parent]] = value
            
# Reversed dictionaries
ukbb_dict_child_rev = dict()

for key, value in ukbb_dict_child.items():
    ukbb_dict_child_rev.update({value: key})

In [38]:
# Recreate the PCA image

out_dir = "images"
fname = "ukbb_pca_coords"

proj_pca = np.loadtxt(os.path.join(proj_dir, fname))

proj = proj_pca

size = 5
alp = 0.3

fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, aspect=1)

for pop in ukbb_eth_dict_parent:
    if pop in ['White','Mixed','Asian or Asian British','Black or Black British']:
        temp_proj = proj[indices_of_population_members[pop],:]
        ax.scatter(temp_proj[:,0],
                   temp_proj[:,1],
                   marker = ".",
                   s = size,
                   alpha = alp,
                   label = pop,
                   color = color_dict_ukbb[pop])

    for subpop in ukbb_eth_dict_parent[pop]:
        temp_proj = proj[indices_of_population_members[subpop],:]
        ax.scatter(temp_proj[:,0],
                   temp_proj[:,1],
                   marker = markers_dict[pop],
                   s = size,
                   alpha = alp,
                   label = subpop,
                   color = color_dict_ukbb[subpop])

#ax.axis("off")

plt.tick_params(
    axis='x',
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off

plt.tick_params(
    axis='y',
    which='both',
    left=False,
    right=False,
    labelleft=False)

# Hide the right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

#plt.style.use(['default'])
plt.xlabel("PC1")
plt.ylabel("PC2")

plt.tight_layout()

fpath = os.path.join(out_dir, fname + "_eth.png")

fig.savefig(fpath, bbox_inches = "tight", format="png", dpi = 300)
plt.close()

In [37]:
# Recreate a UMAP figure
fname = "UKBB_UMAP_PC20_NC2_NN15_MD0.5_2018454111"

proj = np.loadtxt(os.path.join(proj_dir, fname))

size = 5
alp = 0.3

fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, aspect=1)

for pop in ukbb_eth_dict_parent:
    if pop in ['White','Mixed','Asian or Asian British','Black or Black British']:
        temp_proj = proj[indices_of_population_members[pop],:]
        ax.scatter(temp_proj[:,0],
                   temp_proj[:,1],
                   marker = ".",
                   s = size,
                   alpha = alp,
                   label = pop,
                   color = color_dict_ukbb[pop])

    for subpop in ukbb_eth_dict_parent[pop]:
        temp_proj = proj[indices_of_population_members[subpop],:]
        ax.scatter(temp_proj[:,0],
                   temp_proj[:,1],
                   marker = markers_dict[pop],
                   s = size,
                   alpha = alp,
                   label = subpop,
                   color = color_dict_ukbb[subpop])

#ax.axis("off")

plt.tick_params(
    axis='x',
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off

plt.tick_params(
    axis='y',
    which='both',
    left=False,
    right=False,
    labelleft=False)

# Hide the right and top spines
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

#plt.style.use(['default'])
plt.xlabel("UMAP1")
plt.ylabel("UMAP2")

plt.tight_layout()

fpath = os.path.join(out_dir, fname.replace(".","") + "_eth.png")

fig.savefig(fpath, bbox_inches = "tight", format="png", dpi = 300)
plt.close()

In [32]:
?plt.tick_params