# Test animation code
Plot a thousand arrays and SEE COOL THINGS

In [19]:
import matplotlib.pyplot as plt

import collections
import imageio
import numpy as np
import os
import time

from collections import defaultdict

General format for the code below:
* Loop through files (skipping directories)
* Create images from projections
* Create movies from images

# 1000G animation

In [20]:
%store -r continents
%store -r pop_by_continent
%store -r pop
%store -r indices_of_population_members
%store -r name_by_code
%store -r continent_by_population
%store -r individuals
%store -r population_by_individual
%store -r individuals_by_population
%store -r populations
%store -r color_dict

In [11]:
parent_dir = '/Volumes/Stockage/alex/ukbb_anim'
data_dir = os.path.join(parent_dir, 'Data')
movie_dir = os.path.join(parent_dir, 'Movie')
img_dir = os.path.join(parent_dir, '/Images/Labelled_20_by_20_2PCs')

In [10]:
# Create images (specific to 1000G)
for file in os.listdir(data_dir):

    if os.path.isdir(os.path.join(data_dir, file)):
        continue

    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111, aspect=1)
    temp_array = np.loadtxt(os.path.join(data_dir,file))
    
    for cont in continents:
        for pop in pop_by_continent[cont]:
            proj = temp_array[indices_of_population_members[pop]]
            ax.plot(proj[:,0], proj[:,1], '.', color = color_dict[pop])

    ax.set_title('Iteration: ' + file)
    plt.savefig(os.path.join(img_dir,file) + '.jpeg', format='jpeg')

    plt.close(fig)

NameError: name 'continents' is not defined

# UKBB animation

In [1]:
# Import auxiliary data from the UKBB notebook
%store -r ukbb_eth_dict_parent
%store -r indices_of_population_members
%store -r color_dict_ukbb
%store -r markers_dict

In [7]:
ukbb_parent_dir = '/Volumes/Stockage/alex/ukbb_anim'
ukbb_data_dir = os.path.join(ukbb_parent_dir, 'Data/10_pcs_1500_iterations')
ukbb_movie_dir = os.path.join(ukbb_parent_dir, 'Movie')
ukbb_img_dir = os.path.join(ukbb_parent_dir, 'Images/Labelled_17_by_17_10PCs_1500_iterations')

In [9]:
# Create images from projections (if not already created)
for file in os.listdir(ukbb_data_dir):

    if os.path.isdir(os.path.join(ukbb_data_dir, file)):
        continue

    temp_array = np.loadtxt(os.path.join(ukbb_data_dir,file))
        
    fig = plt.figure(figsize=(17,17))
    ax = fig.add_subplot(111, aspect=1)
    
    for pop in ukbb_eth_dict_parent:
        if pop in ['White','Mixed','Asian or Asian British','Black or Black British']:
            temp_proj = temp_array[indices_of_population_members[pop],:]
            ax.plot(temp_proj[:,0], temp_proj[:,1],'.',label=pop,color=color_dict_ukbb[pop])
        
        for subpop in ukbb_eth_dict_parent[pop]:
            temp_proj = temp_array[indices_of_population_members[subpop],:]
            ax.plot(temp_proj[:,0], temp_proj[:,1],'.',label=subpop,color=color_dict_ukbb[subpop])

    ax.legend(ncol=4,loc='center left', bbox_to_anchor=(0,-0.1), fontsize=20)
    ax.set_title('Iteration: ' + file)
    plt.savefig(os.path.join(ukbb_img_dir,file) + '.jpeg', format='jpeg')
    
    plt.close(fig)

In [10]:
# Create a movie from the images
ukbb_movie_name = 'Labelled_17_by_17_10PCs_1500_iterations.gif'

images = []
for filename in os.listdir(ukbb_img_dir):
    if os.path.isdir(os.path.join(ukbb_img_dir,filename)):
        continue
        
    images.append(imageio.imread(os.path.join(ukbb_img_dir,filename)))

imageio.mimsave(os.path.join(ukbb_movie_dir,ukbb_movie_name), images)

# HRS animation

In [11]:
hrs_parent_dir = '/Volumes/Stockage/alex/hrs'
hrs_data_dir = os.path.join(hrs_parent_dir,'iterations')
hrs_img_dir = os.path.join(hrs_parent_dir,'images')
hrs_movie_dir = os.path.join(hrs_parent_dir,'movies')

In [22]:
# Sharing variables between notebooks
%store -r aux_data_hrs
%store -r hisp_dict
%store -r hisp_dict_rev
%store -r mex_dict
%store -r mex_dict_rev
%store -r race_dict
%store -r race_dict_rev
%store -r brn_dict
%store -r brn_dict_rev
%store -r racedb_dict
%store -r racedb_dict_rev

%store -r aux_data_1
%store -r aux_data_2
%store -r aux_data_3
%store -r aux_data_4
%store -r aux_data_5
%store -r aux_data_6

%store -r individuals_hrs

In [24]:
aux_to_use = aux_data_4

In [25]:
# Define a colour set - this supports up to 30 colours
from bokeh.palettes import Category20
from bokeh.palettes import PRGn
from bokeh.palettes import Set1

# These auxiliary data sets will define how we label the observations

# Columns are:
# 0 = ID, 1 = Family ID, 2 = Birth Year
# 3 = Hispanic, 4 = Detailed Hispanic, 5 = Race, 6 = Birth Region, 7 = Birth region name
# 10= dbGaP race (Note: Black != AfrAm and White != Not_AfrAm)

# Create multiple types of categorization based on variables to include
# 1 - Birth region, race, Hispanic status, Mexican status
# 2 - Race, Hispanic status, Mexican status
# 3 - Birth region, race
# 4 - Race, Hispanic status
# 5 - Birth region
# 6 - Birth region, Hispanic status, Mexican status

# We must define the population dictionary we wish to use
# The following gives us a collection of all categories of some population and/or proxy for ethnicity:
eth_proxy_set = set([a[1] for a in aux_to_use])
pop_dict = dict()

for e in eth_proxy_set:
    el = e.split('_')
    
    if aux_to_use == aux_data_1:    
        temp_brn = brn_dict_rev[el[0]]
        temp_race = race_dict_rev[el[1]]
        temp_hisp = hisp_dict_rev[el[2]]
        temp_mex = mex_dict_rev[el[3]]
        
        pop_dict.update({e:temp_brn + ' ' + temp_race + ' ' + temp_hisp + ' ' + temp_mex})
    elif aux_to_use == aux_data_2:
        temp_race = race_dict_rev[el[0]]
        temp_hisp = hisp_dict_rev[el[1]]
        temp_mex = mex_dict_rev[el[2]]
        temp_list = [temp_race, temp_hisp, temp_mex]
        
        pop_dict.update({e:temp_race + ' ' + temp_hisp + ' ' + temp_mex})
    elif aux_to_use == aux_data_3:
        temp_brn = brn_dict_rev[el[0]]
        temp_race = race_dict_rev[el[1]]
        
        pop_dict.update({e: temp_brn + ' ' + temp_race})
    elif aux_to_use == aux_data_4:
        temp_race = race_dict_rev[el[0]]
        temp_hisp = hisp_dict_rev[el[1]]
        
        pop_dict.update({e: temp_race + ' ' + temp_hisp})
    elif aux_to_use == aux_data_5:
        temp_brn = brn_dict_rev[el[0]]
        
        pop_dict.update({e: temp_brn})
    elif aux_to_use == aux_data_6:
        temp_brn = brn_dict_rev[el[0]]
        temp_hisp = hisp_dict_rev[el[1]]
        temp_mex = mex_dict_rev[el[2]]
        
        pop_dict.update({e: temp_brn + ' ' + temp_hisp + ' ' + temp_mex})

color_dict_hrs = {}

for j, pop in enumerate(eth_proxy_set):
    if j < 20:
        color_dict_hrs[pop] = Category20[20][j]
    elif j < 30:
        color_dict_hrs[pop] = PRGn[10][j%20]
    else:
        color_dict_hrs[pop] = Set1[9][j%30]

# Set up an index of each population member and vice versa
# We want to quickly access a given individual's population and a given population's individuals
population_by_individual_hrs = defaultdict(int)
individuals_by_population_hrs = defaultdict(list)

for a in aux_to_use:
    population_by_individual_hrs[a[0]] = a[1]
    individuals_by_population_hrs[a[1]].append(a[0])
    
indices_of_population_members_hrs = defaultdict(list)

for index, indiv in enumerate(individuals_hrs):
    try:
        indices_of_population_members_hrs[population_by_individual_hrs[indiv]].append(index)
    except KeyError:
        continue

In [46]:
#x=0
for file in os.listdir(hrs_data_dir):
    if os.path.isdir(os.path.join(hrs_parent_dir,file)):
        continue
    
    #if x > 1:
    #    continue
    
    temp_array = np.loadtxt(os.path.join(hrs_data_dir,file))
    
    fig = plt.figure(figsize=(10,10))
    ax = fig.add_subplot(111, aspect=1)
    
    for pop in sorted(eth_proxy_set):
        temp_proj = temp_array[indices_of_population_members_hrs[pop]]
        ax.plot(temp_proj[:,0], temp_proj[:,1],'.',label=pop_dict[pop],color=color_dict_hrs[pop])
    
    ax.legend(ncol=2,loc='center', bbox_to_anchor=(0.5,-0.1), fontsize=20)
    ax.set_title('Iteration: ' + file)
    plt.savefig(os.path.join(hrs_img_dir,file) + '.jpeg', format='jpeg')
    
    plt.close(fig)
    
    #x+=1