# Notebook to convert the Baysor output (poly.json) and spots.csv to tsv so they can be used in the viewer

The three files that are generated (cellCoords_0.tsv, geneData_0.tsv and cellData_0.tsv) should be 
copied in the relevant folder under dashboard/data/tsv/

In [1]:
import pandas as pd
import numpy as np

## Create cellCoords_0.tsv

In [2]:
cellCoords = pd.read_json('poly.json')
cellCoords['cell_id'] = np.arange(cellCoords.shape[0])
cellCoords['label'] = cellCoords['cell_id'] + 1

In [3]:
cellCoords = cellCoords.rename({0: '_coords'}, axis=1)

In [4]:
cellCoords = cellCoords[['cell_id', 'label', '_coords']]

In [5]:
coords = []
for index, row in cellCoords.iterrows():
    c = np.array(row['_coords']).astype(np.int64)
    coords.append(c.tolist())

In [6]:
cellCoords['coords'] = coords
cellCoords = cellCoords[['cell_id', 'label', 'coords']]
cellCoords.to_csv('cellCoords_0.tsv', index=False, sep='\t')

In [7]:
cellCoords.head()

Unnamed: 0,cell_id,label,coords
0,0,1,"[[36, 164], [36, 169], [36, 174], [31, 174], [..."
1,1,2,"[[145, 160], [150, 160], [155, 160], [160, 160..."
2,2,3,"[[17, 283], [12, 283], [7, 283], [7, 279], [7,..."
3,3,4,"[[160, 231], [160, 236], [164, 236], [169, 236..."
4,4,5,"[[112, 255], [117, 255], [121, 255], [126, 255..."


## Create geneData_0.tsv

In [8]:
geneData = pd.read_csv('spots_all.csv')
geneData['x'] = geneData.x.astype(np.int64)
geneData['y'] = geneData.y.astype(np.int64)


In [9]:
geneData = geneData.rename({'gene': 'Gene'}, axis=1)
_, Expt = np.unique(geneData.Gene, return_inverse=True)
geneData['Expt'] = Expt
# geneData = geneData[['Gene','Expt', 'x', 'y']]
# geneData

In [10]:
background_cell_id = cellCoords.cell_id.max() + 1
background_cell_id

6544

In [11]:
# geneData

In [12]:
neighbour = int(background_cell_id) * np.ones(geneData.shape[0])
neighbour_array = [[int(x)] for x in neighbour]
neighbour_prob = [[1.0] for x in neighbour]

In [13]:
geneData['neighbour'] = neighbour.astype(np.int)
geneData['neighbour_array'] = neighbour_array
geneData['neighbour_prob'] = neighbour_prob

In [14]:
geneData.to_csv('geneData_0.tsv', index=False, sep='\t')

In [15]:
geneData.head()

Unnamed: 0,Gene,y,x,Expt,neighbour,neighbour_array,neighbour_prob
0,Aldoc,5630,8473,0,6544,[6544],[1.0]
1,Aldoc,5638,9208,0,6544,[6544],[1.0]
2,Aldoc,5648,9035,0,6544,[6544],[1.0]
3,Aldoc,5653,7982,0,6544,[6544],[1.0]
4,Aldoc,5659,8767,0,6544,[6544],[1.0]


## Create cellData_0.tsv

In [16]:
cellData = pd.DataFrame({'Cell_Num': cellCoords.cell_id})
cellData['X'] = 0
cellData['Y'] = 0
cellData['Genenames'] = [[] for x in cellData.X]
cellData['CellGeneCount'] = [[] for x in cellData.X]
cellData['ClassName'] = [['Generic'] for x in cellData.X]
cellData['Prob'] = [[1.0] for x in cellData.X]

In [17]:
cellData.to_csv('cellData_0.tsv', index=False, sep='\t')
cellData.head()

Unnamed: 0,Cell_Num,X,Y,Genenames,CellGeneCount,ClassName,Prob
0,0,0,0,[],[],[Generic],[1.0]
1,1,0,0,[],[],[Generic],[1.0]
2,2,0,0,[],[],[Generic],[1.0]
3,3,0,0,[],[],[Generic],[1.0]
4,4,0,0,[],[],[Generic],[1.0]
