In [1]:
import pandas as pd
import pickle

# Methods for saving Python objects.
def save_obj(obj, saveFile):
    with open(saveFile + '.pkl', 'wb') as fid:
        pickle.dump(obj, fid, pickle.HIGHEST_PROTOCOL)

# Path to project directory.
path = "/home/xander/Projects/plotly/"

# DataFrame.
df = pd.read_csv(path + "data/sampleData/integrated_call_samples_v3.20130502.ALL.panel", index_col=0, sep='\t')
df.dropna(inplace=True, axis=1)
df.to_csv(path + "data/cleaned/sampleData.csv")

# Population codes.
pop_df = pd.read_csv(path + "data/sampleData/populations.tsv", sep='\t')
pop_df = pop_df[["Population Code", "Population Description", "Super Population"]]
pop_df.dropna(inplace=True)
pop_df.set_index("Population Code", inplace=True)
ll_df = pd.read_csv(path + "data/sampleData/latsLongs.tsv", sep='\t', index_col=0)
pop_df = pop_df.join(ll_df)
pop_df.sort_values(by=["Population Description"], inplace=True)
pops = pop_df.index.tolist()
save_obj(pops, path + "data/cleaned/pops")

# Dictionary mapping population codes to descriptions
pop2descrip = dict(zip(pops, pop_df["Population Description"].tolist()))
save_obj(pop2descrip, path + "data/cleaned/pop2descrip")

# Dictionary mapping super population codes to descriptions.
spops = df["Super Population"].tolist()
spops = list(set(spops))
spops.sort()
spop2descrip = dict(zip(spops, ["African", "Admixed American", "East Asian", "European", "South Asian"]))
save_obj(spop2descrip, path + "data/cleaned/spop2descrip")

In [4]:
# Mapping into LAB space.

from colormath.color_objects import LabColor, sRGBColor
from colormath.color_conversions import convert_color


lats, longs = pop_df["Latitude"].values, pop_df["Longitude"].values
lats *= 128 / 90
longs *= 128 / 180

popColors = list()
for lat, long in zip(lats, longs):
    lab = LabColor(75, lat, long)
    rgb = convert_color(lab, sRGBColor).get_upscaled_value_tuple()
    colorStr = "rgb(" + ", ".join(map(str, rgb)) + ")"
    popColors.append(colorStr)
    
# Dictionary mapping populations to colors.
pop2rgb = dict(zip(pops, popColors))
save_obj(pop2rgb, path + "data/cleaned/pop2rgb")

In [3]:
# Mapping into HSV space.

from colormath.color_objects import HSVColor, sRGBColor
from colormath.color_conversions import convert_color


lats, longs = pop_df["Latitude"].values + 90, pop_df["Longitude"].values + 180
lats *= 250 / 180
print(lats.max(), lats.min())
print(longs.max(), longs.min())


popColors = list()
for lat, long in zip(lats, longs):
    hsv = HSVColor(long, 1, lat)
    rgb = convert_color(hsv, sRGBColor).get_upscaled_value_tuple()
    colorStr = "rgb(" + ", ".join(map(str, rgb)) + ")"
    popColors.append(colorStr)
    
# Dictionary mapping populations to colors.
pop2rgb = dict(zip(pops, popColors))
save_obj(pop2rgb, path + "data/cleaned/pop2rgb")

247.3192098765432 101.20464197530863
279.33632 95.9155911111111


In [21]:
from matplotlib.cm import get_cmap
from matplotlib.colors import to_rgb
import numpy as np

cmap = get_cmap('rainbow')
numPops = len(pops)
colors = cmap(np.array(range(numPops)) / (numPops - 1))
colors = colors[:,:3]
color = np.round(colors * 255)
popColors = list()
for rgb in color.tolist():
    rgbStr = "rgb(" + ", ".join(map(lambda x: str(int(x)), rgb)) + ")"
    popColors.append(rgbStr)
    
# Dictionary mapping populations to colors.
pop2rgb = dict(zip(pops, popColors))
save_obj(pop2rgb, path + "data/cleaned/pop2rgb")