In [1]:
import numpy as np
from scipy.sparse import coo_matrix
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from numpy import linalg as LA
from scipy.sparse.linalg import svds, eigs
from sklearn.decomposition import PCA
import pickle
import pandas as pd
from random import randint

import kmapper as km
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN
import networkx as nx

def convertNansToZeros(ma):
    nan_elements = np.flatnonzero(np.isnan(ma.data))
    if len(nan_elements) > 0:
        ma.data[nan_elements] = 0
    return ma


def convertInfsToZeros(ma):
    inf_elements = np.flatnonzero(np.isinf(ma.data))
    if len(inf_elements) > 0:
        ma.data[inf_elements] = 0
    return ma

In [44]:
%matplotlib
datadir = '/home/garner1/Work/dataset/SSF/prostate-twelve/'
section = 'P1.2'
# section = 'sample1.2_deconvolution'
filename = datadir+section+'.tsv'

df = pd.read_csv(filename, sep='\t', header=0, index_col=0) # read the gene X position count-matrix

spatialCoord = [w.split('x') for w in list(df.columns.values)]
spatialCoord = [map(int,pairs) for pairs in spatialCoord]
i = [l[0] for l in spatialCoord]
j = [l[1] for l in spatialCoord]

dfnorm = df.div(df.sum(axis=0),axis=1) #normalize to probabilities the transcriptome at each position

data = dfnorm.values

# Initialize
mapper = km.KeplerMapper(verbose=1)

# Fit to and transform the data
dim = 3
projection = PCA(n_components=dim)
projected_data = mapper.fit_transform(data, projection=projection) # X-Y axis

# Create dictionary called 'graph' with nodes, edges and meta-information
nr_cubes = 100
graph = mapper.map(projected_data, data, nr_cubes=nr_cubes)

# Visualize it
mapper.visualize(graph, path_html=section+".PCA."+str(nr_cubes)+"."+str(dim)+"_mapper_output.html",title=filename)

Using matplotlib backend: Qt5Agg
KeplerMapper()
..Composing projection pipeline of length 1:
	Projections: PCA(copy=True, iterated_power='auto', n_components=3, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
	Distance matrices: False
	Scalers: MinMaxScaler(copy=True, feature_range=(0, 1))
..Projecting on data shaped (17678, 406)

..Projecting data using: 
	PCA(copy=True, iterated_power='auto', n_components=3, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)


..Scaling with: MinMaxScaler(copy=True, feature_range=(0, 1))

Mapping on data shaped (17678, 406) using lens shaped (17678, 3)

Creating 1000000 hypercubes.

Created 77 edges and 35 nodes in 0:02:50.976858.
Wrote visualization to: P1.2.PCA.100.3_mapper_output.html


u'<!DOCTYPE html>\n<html>\n\n<head>\n  <meta charset="utf-8">\n  <meta name="generator" content="KeplerMapper">\n  <title>/home/garner1/Work/dataset/SSF/prostate-twelve/P1.2.tsv | KeplerMapper</title>\n\n  <link rel="icon" type="image/png" href="http://i.imgur.com/axOG6GJ.jpg" />\n\n  <link href=\'https://fonts.googleapis.com/css?family=Roboto+Mono:700,300\' rel=\'stylesheet\' type=\'text/css\'>\n  <style>* {\n  margin: 0;\n  padding: 0;\n}\n\nhtml, body {\n  height: 100%;\n}\n\nbody {\n  font-family: "Roboto Mono", "Helvetica", sans-serif;\n  font-size: 14px;\n}\n\n#logo {\n  position: absolute;\n  right: 00px;\n  top: 0px;\n  width: 90px;\n  height: 90px;\n  z-index: 999999;\n}\n\n#display {\n  color: #95A5A6;\n  background: #212121;\n}\n\n#print {\n  color: #000;\n  background: #FFF;\n}\n\nh1 {\n  font-size: 21px;\n  font-weight: 300;\n  font-weight: 300;\n}\n\nh2 {\n  font-size: 18px;\n  padding-bottom: 20px;\n  font-weight: 300;\n}\n\nh3 {\n  font-size: 14px;\n  font-weight: 700;\

In [30]:
gene = randint(0,df.shape[1])
data = dfnorm.values.tolist()[gene]
mat = coo_matrix((data, (i, j)), [max(i)+1, max(j)+1]).todense()

cmap = sns.cubehelix_palette(light=1, as_cmap=True)
fig = sns.heatmap(mat,annot=False,cmap=cmap)
fig.set_title(list(df.index)[gene])

Text(0.5,1,'IST1 ENSG00000182149')