In [1]:
# Load libraries
import os
os.chdir('/Users/denis/Documents/Projects/cmap/cmap-l1000-analysis')
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from plotly.graph_objs import *
init_notebook_mode()

In [2]:
# Get data
infiles = ['f3-plotly_data.dir/cmap_l1000-pca.txt', 'f3-plotly_data.dir/cmap_l1000-celltype_pca.txt']

# Define function
getVarLabelDict = lambda row: {x.split(' ')[0]:x for x in row if type(x) == str and 'PC' in x}

# Get data for PCA
pcaDataframe = pd.read_table(infiles[0], index_col='sample_id')

# Get data for celltype PCA
celltypePcaDataframe = pd.read_table(infiles[1], index_col='sample_id')

In [3]:
def plot3dScatter(dataframe, categoricalColumn, varLabelDict, annotationColumn=False, title='', PCs=['PC1', 'PC2', 'PC3'], colors=['#e41a1c','#377eb8','#4daf4a','#984ea3','#ff7f00']):

    # Define empty plot
    p = []
    
    # Get unique categories
    uniqueCategories = list(set(dataframe[categoricalColumn]))

    # Get dict with unique categories
    categoricalExpressionDict = {x:dataframe[dataframe[categoricalColumn] == x] for x in uniqueCategories}

    # Loop through categories
    for i in range(len(uniqueCategories)):

        # Get category
        category = uniqueCategories[i]
    
        # Get plot dataframe
        plotDataframe = categoricalExpressionDict[category]
        
        # Get annotation, if specified
        annotation = plotDataframe[annotationColumn] if annotationColumn else ''

        # Append trace
        p.append(
            Scatter3d(
                x = plotDataframe[PCs[0]],
                y = plotDataframe[PCs[1]],
                z = plotDataframe[PCs[2]],
                mode='markers',
                text=annotation,
                name=category,
                marker=dict(
                    size=5,
                    color=colors[i],
                    opacity=0.9
                ),
            )
        )
        
    # Add layout
    layout = Layout(
        title=title,
        hovermode='closest',
        width=1000,
        height=1000,
        scene=dict(
            xaxis=dict(title=varLabelDict[PCs[0]]),
            yaxis=dict(title=varLabelDict[PCs[1]]),
            zaxis=dict(title=varLabelDict[PCs[2]]),
        ),
        margin=dict(
            l=50,
            r=50,
            b=50,
            t=50
        )
    )
    
    # Prepare figure
    fig = dict(data=p, layout=layout)
    
    # Return figure
    return fig

In [4]:
# Get var labels
varLabelDict = getVarLabelDict(pcaDataframe.loc['varLabels'])

# Plot PCA
fig = plot3dScatter(pcaDataframe.drop('varLabels', 0), 'cell_id', varLabelDict, annotationColumn='perturbationLabel', title='PCA Analysis of CMAP L1000 data<br>grouped by cell line' % locals())
iplot(fig)

In [5]:
# Loop through cell types
for cellType in set(celltypePcaDataframe['cell_id']):
    
    # Get subset
    celltypePcaDataframeSubset = celltypePcaDataframe[celltypePcaDataframe['cell_id'] == cellType]
    
    # Get dict
    celltypeVarLabelDict = getVarLabelDict(celltypePcaDataframeSubset.loc['varLabels'])

    # Plot PCA
    fig = plot3dScatter(celltypePcaDataframeSubset.drop('varLabels', 0), 'plate_name', varLabelDict, annotationColumn='perturbationLabel', title='PCA Analysis of CMAP L1000 data<br>%(cellType)s cells, grouped by plate' % locals())
    iplot(fig)
