In [2]:
import pandas as pd
import numpy as np

import plotly
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
plotly.plotly.sign_in('spersad', 'oNkuP1yzbpN734Ag8M9P')
import plotly.graph_objs as go

from IPython.core.display import display, HTML

In [3]:

data = pd.read_csv('data_CNA.xls.txt',sep='\t')
labels= pd.read_csv('OLD_DATA/data_clinical_sample_clean.tsv',sep='\t')

data.set_index('Hugo_Symbol', inplace=True)
labels.set_index('SAMPLE_ID', inplace=True)

data.drop(['Entrez_Gene_Id'], axis=1,inplace=True)
labels.drop(['Unnamed: 0'], axis=1,inplace=True)
data = data.transpose() # Convert data to matrix, rows are tumour samples

labels = labels['ONCOTREE_CODE']
result = pd.concat([data, labels], axis=1)
result.head()

labels=result['ONCOTREE_CODE']

In [3]:
data.as_matrix() 

array([[ 0,  0,  0, ..., -1, -1, -1],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       ..., 
       [ 0,  0,  0, ..., -1, -1, -1],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]], dtype=int64)

In [99]:
def performPCA(X, Y, num_examples=2000, metric='euclidean'):
    from sklearn.decomposition import PCA
    model = PCA(n_components=3) # fit into 2D space
    print('Defined model')
    print('Performing PCA on data with shape {0}'.format(X.shape))        
    embeddedX = model.fit_transform(X)
    print('Created embedding')

    # Scatter plot to visualize embedded data
    # Create a trace
    trace = go.Scatter3d(
        x = embeddedX[:,0],
        y = embeddedX[:,1],
        z = embeddedX[:,2],
        mode = 'markers',
        marker=dict(
            size='5',
            color = Y, # color points by label they belong to
            colorscale= [[0, '#dd2c4f'], [1, '#3d6fcc']],
        ),
        #text = Y#[str(x) for x in X]
    )

    data = [trace]
    
    layout = go.Layout(
        title='PCA Embedding of Clusters in 2D Space',
    )

    fig = go.Figure(data=data, layout=layout)
    iplot(fig, filename='PCA-embedding')
    
    print('Plotted data')
    

In [101]:
performPCA(data.as_matrix()[:70], labels[:70])

Defined model
Performing PCA on data with shape (70, 23109)
Created embedding


Plotted data


In [104]:
def performTSNE(X, Y, num_examples=2000, perp=30, metric='euclidean'):
    from sklearn.manifold import TSNE
    model = TSNE(n_components=3, perplexity = perp, random_state=0, metric=metric) # fit into 2D space
    print('Defined model')
    print('Performing PCA on data with shape {0}'.format(X.shape))        
    embeddedX = model.fit_transform(X)
    print('Created embedding')

    # Scatter plot to visualize embedded data
    # Create a trace
    trace = go.Scatter3d(
        x = embeddedX[:,0],
        y = embeddedX[:,1],
        z = embeddedX[:,2],
        mode = 'markers',
        marker=dict(
            size='5',
            color = Y, # color points by label they belong to
            colorscale= [[0, '#dd2c4f'], [1, '#3d6fcc']],
        ),
        #text = Y#[str(x) for x in X]
    )

    data = [trace]
    
    layout = go.Layout(
        title='t-SNE Embedding of Clusters in 2D Space',
    )

    fig = go.Figure(data=data, layout=layout)
    iplot(fig, filename='PCA-embedding')
    
    print('Plotted data')

In [105]:
performTSNE(data.as_matrix(), labels)

Defined model
Performing PCA on data with shape (103, 23109)
Created embedding


Plotted data


In [4]:
data.transpose().corr()

Unnamed: 0,MBC-MBCProject_GvHkH2Hk-Tumor-SM-AZ5H9,MBC-MBCProject_GvHkH2Hk-Tumor-SM-AZ5HV,MBC-MBCProject_N4srsKsr-Tumor-SM-CGLTK,MBC-MBCProject_N4srsKsr-Tumor-SM-CGMGK,MBC-MBCProject_4MF1FlFQ-Tumor-SM-AZ5CU,MBC-MBCProject_4MF1FlFQ-Tumor-SM-CGM4M,MBC-MBCProject_wAiri7fp-Tumor-SM-AZ5DH,MBC-MBCProject_wAiri7fp-Tumor-SM-AZ5E4,MBC-MBCProject_K7f6fdUz-Tumor-SM-AZ5MA,MBC-MBCProject_99CdCOHm-Tumor-SM-AZ5MW,...,MBC-MBCProject_ErfKfJt0-Tumor-SM-AZ5GM,MBC-MBCProject_K0UDUnFE-Tumor-SM-AZ5FD,MBC-MBCProject_RKf1frsr-Tumor-SM-AZ5JR,MBC-MBCProject_RKf1frsr-Tumor-SM-AZ5L1,MBC-MBCProject_RKf1frsr-Tumor-SM-CGLJI,MBC-MBCProject_gjhMuoha-Tumor-SM-AXGGJ,MBC-MBCProject_gotjfgf0-Tumor-SM-AXGPS,MBC-MBCProject_gotjfgf0-Tumor-SM-CGL7K,MBC-MBCProject_1qhlIasw-Tumor-SM-AXGNH,MBC-MBCProject_3jhES9fq-Tumor-SM-AXGIU
MBC-MBCProject_GvHkH2Hk-Tumor-SM-AZ5H9,1.000000,0.555957,0.350301,0.344324,0.315269,0.289621,0.042001,0.055013,0.044545,0.032887,...,0.044617,0.435604,0.206070,0.131342,0.289310,0.405010,0.146046,0.227976,0.274226,0.502588
MBC-MBCProject_GvHkH2Hk-Tumor-SM-AZ5HV,0.555957,1.000000,0.404550,0.382044,0.324226,0.279553,0.033155,0.145012,0.147951,0.014733,...,0.107947,0.316072,0.190579,0.169786,0.237703,0.378749,0.095843,0.095072,0.314336,0.653725
MBC-MBCProject_N4srsKsr-Tumor-SM-CGLTK,0.350301,0.404550,1.000000,0.542054,0.241239,0.249490,0.368208,0.134285,0.096991,0.155998,...,0.058184,0.249961,0.270822,0.246570,0.317125,0.278639,0.125851,0.108043,0.303576,0.542622
MBC-MBCProject_N4srsKsr-Tumor-SM-CGMGK,0.344324,0.382044,0.542054,1.000000,0.287227,0.317821,0.243709,0.058861,0.264147,0.245513,...,0.079709,0.242616,0.239488,0.218771,0.290545,0.280837,0.200905,0.221114,0.272352,0.411031
MBC-MBCProject_4MF1FlFQ-Tumor-SM-AZ5CU,0.315269,0.324226,0.241239,0.287227,1.000000,0.780511,0.085392,0.100562,0.216823,0.113928,...,0.084579,0.298087,0.136105,0.087299,0.211760,0.319699,0.249880,0.227928,0.268644,0.345598
MBC-MBCProject_4MF1FlFQ-Tumor-SM-CGM4M,0.289621,0.279553,0.249490,0.317821,0.780511,1.000000,0.116476,0.041727,0.190472,0.169137,...,0.042298,0.281324,0.120611,0.065096,0.202247,0.278099,0.258044,0.237427,0.226568,0.268999
MBC-MBCProject_wAiri7fp-Tumor-SM-AZ5DH,0.042001,0.033155,0.368208,0.243709,0.085392,0.116476,1.000000,0.267064,0.035830,0.235185,...,0.092324,0.022879,0.138320,0.215145,0.123446,0.073917,0.141299,0.091693,0.154652,0.061343
MBC-MBCProject_wAiri7fp-Tumor-SM-AZ5E4,0.055013,0.145012,0.134285,0.058861,0.100562,0.041727,0.267064,1.000000,0.033274,0.090001,...,0.164214,0.029490,0.071682,0.163860,0.062022,0.043756,0.069705,0.011792,0.086201,0.206916
MBC-MBCProject_K7f6fdUz-Tumor-SM-AZ5MA,0.044545,0.147951,0.096991,0.264147,0.216823,0.190472,0.035830,0.033274,1.000000,0.087951,...,0.046550,0.130179,0.184872,0.190644,0.184765,0.284990,0.250790,0.196568,0.246007,0.151086
MBC-MBCProject_99CdCOHm-Tumor-SM-AZ5MW,0.032887,0.014733,0.155998,0.245513,0.113928,0.169137,0.235185,0.090001,0.087951,1.000000,...,0.025770,0.113807,0.050754,0.036067,0.101637,0.078360,0.148574,0.096113,0.088423,0.034830


In [4]:
import seaborn as sns

In [5]:
g = sns.clustermap(data.transpose(), metric="correlation")


The axisbg attribute was deprecated in version 2.0. Use facecolor instead.



In [11]:
sns.plt.show()