In [30]:
import pandas as pd
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
import matplotlib.cm

# Path to project directory.
path = "/home/ubuntu/onekgenomes/"

# Get population to super population dictionary.
sampleDataFile = "data/sampleData/populations.tsv"
df = pd.read_csv(path + sampleDataFile, sep='\t', index_col=1)
df.dropna(inplace=True)
pops = df.index.tolist()
descriptions = df["Population Description"].tolist()
superPops = df["Super Population"].tolist()

# Create dictionaries mapping populations to descriptions and super populations.
pop2superPopDict = dict(zip(pops, superPops))
pop2descripDict = dict(zip(pops, descriptions))

'African Ancestry in Southwest US'

In [31]:
sampleDataFile = "data/sampleData/sampleData.tsv"
df = pd.read_csv(path + sampleDataFile, sep='\t', index_col=0)

# Populations.
pops = df["Population"].tolist()
uniquePops = list(set(pops))
uniquePops.sort()
numPops = len(pops)
targetDict = dict(zip(uniquePops, range(numPops)))
popArray = np.array([targetDict[p] for p in pops])

# Super Populations.
superPops = df["Super Population"].tolist()
uniqueSuperPops = list(set(superPops))
uniqueSuperPops.sort()
numSuperPops = len(uniqueSuperPops)
targetDict = dict(zip(uniqueSuperPops, range(numSuperPops)))
superPopArray = np.array([targetDict[p] for p in superPops])

# Map super populations to colors.
cmap = matplotlib.cm.get_cmap('Spectral')
uniqueSuperPopArray = []
temp = uniqueSuperPops / (numSuperPops - 1)
rgba = cmap(temp)
superPopRgb = rgba[:,:3]

In [32]:
uniqueSuperPops

['AFR', 'AMR', 'EAS', 'EUR', 'SAS']

In [7]:
# Gender.
genders = df["Gender"]
G = (genders == "male").values

# Load data.
embeddedAll = np.load(path + "data/dimReduc/dualPCA/embeddedAll.npy")
embeddedNum = np.load(path + "data/dimReduc/dualPCA/embeddedNum.npy")

# Plot.
data = list()
for count, pop in enumerate(uniquePops):
    ind = popArray == count
    x, y, z = np.hsplit(embeddedAll[ind,:], 3)
    temp = superPopRgb[ind,:]
    trace = go.Scatter3d(
        x=x,
        y=y,
        z=z,
        name=pop,
        mode='markers',
        marker=dict(
            size=5,
            color= [count / numPops] * x.shape[0],                # set color to an array/list of desired values
            colorscale=rgb,   # choose a colorscale
            opacity=0.5
        )
    )
    data.append(trace)


layout = go.Layout(
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='3d-scatter-colorscale')

In [5]:
uniqueSuperPops

['AFR', 'AMR', 'EAS', 'EUR', 'SAS']