# Custom segmentation Nanostring data

We here provide commands to segment Nanostring data into the different segmentations. Raw data can be accessed via Nanostring. If data is not available there anymore, raw data can be requested with the corresponding authors.
This notebook is not intended to be executed but rather is a collection of batch commands to execute segmentation and provides the commands used by us.

In [2]:
import pandas as pd
import numpy as np

## Baysor

In [6]:
all_file = pd.read_parquet('all_coordinates_sub.parquet', columns=['slideID', 'fov', 'z_FOV_slice', 'target', 'CellId', 'cell_id', 'CellComp', 'x', 'y'])

In [3]:
all_file['z'] = 0.0015 * all_file['z_FOV_slice']
all_file['cell_id_int'] = all_file['cell_id'].astype('category').cat.codes
all_file['cell_id_int'] += 1
all_file.loc[all_file['CellId']==0, 'cell_id_int'] = 0
all_file = all_file.rename(columns={'cell_id': 'cell_ID'})

In [None]:
all_file.to_parquet('all_coordinates_processed.parquet')

In [8]:
import os
os.mkdir('baysor_transcripts')
os.mkdir('baysor_transcripts/normal')
os.mkdir('baysor_transcripts/cancer')

In [3]:
all_file = pd.read_parquet('all_coordinates_processed.parquet')

In [4]:
all_file[['x', 'y', 'z']] = 1000 * all_file[['x', 'y', 'z']]
all_file.head()

Unnamed: 0,slideID,fov,z_FOV_slice,target,CellId,cell_ID,CellComp,x,y,z,cell_id_int
0,1,12,6,PTK2,19,c_1_12_19,Cytoplasm,17872.479992,-455.92,9.0,34546
1,1,4,7,APOA1,3,c_1_4_3,Cytoplasm,21908.919968,-455.92,10.5,268176
2,1,4,6,TPT1,3,c_1_4_3,Cytoplasm,21896.919964,-455.92,9.0,268176
3,1,8,7,APOA1,26,c_1_8_26,Nuclear,19820.279952,-455.92,10.5,317649
4,1,6,3,TPT1,7,c_1_6_7,Cytoplasm,20828.519946,-455.92,4.5,292932


In [5]:
fov_dict = {}
for slide in all_file['slideID'].unique():
    if slide == 1:
        folder = 'normal'
        continue
    elif slide == 2:
        folder = 'cancer'
    else:
        print(f'{slide} unknown continuing with next fov')
        continue
    current = all_file[all_file['slideID']==slide]
    fov_dict[folder] = current['fov'].unique()
    for fov in fov_dict[folder]:
        current[current['fov']==fov].to_csv(f'baysor_transcripts/{folder}/nanostring_liver_{folder}_{fov}.csv')

In [None]:
' '.join(fov_dict['normal'].astype(str))

'12 4 8 6 3 15 10 7 14 11 9 2 1 13 5 18 19 23 20 25 26 28 30 16 29 17 27 24 31 22 21 38 41 34 40 37 39 36 35 46 44 32 45 43 33 47 42 58 51 62 59 56 53 52 50 57 54 49 61 48 55 60 76 66 71 75 78 74 65 69 77 67 70 68 73 72 64 87 81 91 90 79 89 92 86 80 83 82 85 88 93 84 100 97 104 99 96 101 94 95 98 107 102 105 103 106 118 108 122 123 110 111 117 112 115 119 120 114 109 121 113 116 131 133 124 135 125 132 139 134 130 140 128 136 141 129 126 127 138 137 147 156 155 142 157 149 151 146 153 143 144 152 150 154 145 148 167 160 164 163 158 166 168 170 159 162 171 165 172 161 169 184 175 178 176 181 189 177 179 174 185 188 173 180 183 182 187 186 205 193 198 197 204 199 200 196 191 202 201 206 194 195 207 203 192 190 224 218 220 223 209 213 216 219 222 215 210 214 221 208 225 212 211 217 236 234 231 233 226 232 237 235 230 238 241 227 228 242 239 240 229 258 251 247 252 246 245 253 257 256 243 248 244 249 255 250 254 261 265 273 259 264 269 260 262 272 270 267 268 274 271 263 266 285 286 281 28

In [None]:
' '.join(fov_dict['cancer'].astype(str))

'4 5 3 1 2 7 9 6 10 12 11 8 13 17 18 16 19 14 20 15 21 24 23 22 25 26 29 28 30 33 27 32 31 35 38 39 40 34 36 37 44 42 45 43 46 47 41 48 55 54 53 52 51 50 49 60 58 59 56 57 61 62 68 65 64 63 69 66 67 70 73 75 72 74 71 79 76 78 77 82 81 80 86 85 89 83 88 84 87 92 96 93 94 91 95 90 99 98 102 101 100 97 103 107 108 106 105 104 109 110 114 115 117 113 111 112 116 121 122 119 123 118 120 124 125 127 128 126 129 131 130 134 137 133 136 135 138 132 143 139 140 141 145 144 142 147 146 151 148 149 150 152 157 154 158 153 156 155 159 163 160 161 165 162 166 164 167 169 168 172 173 170 174 171 179 178 180 177 176 175 190 181 189 188 191 182 184 183 185 187 186 198 194 196 192 193 197 195 202 200 204 205 201 199 203 206 212 211 207 210 213 208 209 214 215 216 221 219 218 220 217 222 223 230 227 226 228 224 225 229 232 234 238 233 237 236 231 235 239 245 241 240 244 242 243 247 248 246 250 252 249 251 259 257 254 260 253 258 256 255 265 262 263 267 261 266 264 270 269 274 273 268 272 271 275 277 281

In [None]:
all_file.to_parquet('all_coordinates_processed.parquet')

In [8]:
all_file[all_file['slideID']==1].to_csv('proseg_healthy/coordinates_healthy.csv')
all_file[all_file['slideID']==2].to_csv('proseg_cancer/coordinates_cancer.csv')

In [None]:
%%bash

docker run -it -v /home/cane:/home/cane --rm vpetukhov/baysor:master
N=8
(
for VARIABLE in 12 4 8 6 3 15 10 7 14 11 9 2 1 13 5 18 19 23 20 25 26 28 30 16 29 17 27 24 31 22 21 38 41 34 40 37 39 36 35 46 44 32 45 43 33 47 42 58 51 62 59 56 53 52 50 57 54 49 61 48 55 60 76 66 71 75 78 74 65 69 77 67 70 68 73 72 64 87 81 91 90 79 89 92 86 80 83 82 85 88 93 84 100 97 104 99 96 101 94 95 98 107 102 105 103 106 118 108 122 123 110 111 117 112 115 119 120 114 109 121 113 116 131 133 124 135 125 132 139 134 130 140 128 136 141 129 126 127 138 137 147 156 155 142 157 149 151 146 153 143 144 152 150 154 145 148 167 160 164 163 158 166 168 170 159 162 171 165 172 161 169 184 175 178 176 181 189 177 179 174 185 188 173 180 183 182 187 186 205 193 198 197 204 199 200 196 191 202 201 206 194 195 207 203 192 190 224 218 220 223 209 213 216 219 222 215 210 214 221 208 225 212 211 217 236 234 231 233 226 232 237 235 230 238 241 227 228 242 239 240 229 258 251 247 252 246 245 253 257 256 243 248 244 249 255 250 254 261 265 273 259 264 269 260 262 272 270 267 268 274 271 263 266 285 286 281 288 275 277 283 276 280 282 279 278 284 287 293 304 298 294 302 289 295 291 292 296 297 299 290 301; do
    ((i=i%N)); ((i++==0)) && wait
    baysor run -x x -y y -z z --gene target nanostring_liver_normal_$VARIABLE.csv :cell_id_int -o segmentation_m50_fov_$VARIABLE.csv -p -c config.toml --plot --save-polygons=geojson &  done
)

## ProSeg

In [None]:
%%bash

proseg cancer_tx_file.csv --cosmx; cd ..; proseg normal_tx_file.csv --cosmx