## Generating the test files

In [1]:
import pandas as pd
import numpy as np
import fastremap
from scipy.sparse import coo_matrix, save_npz, load_npz

### Load the data after the algorithm has converged

In [2]:
cellData = pd.read_csv('cellData.tsv', sep='\t')
geneData = pd.read_csv('geneData.tsv', sep='\t')
cellBoundaries = pd.read_csv('cellBoundaries.tsv', sep='\t')

### Select cell inside the bounding box (4238, 364) and (5160, 933)

In [3]:
idx_X = (cellData.X > 4238) & (cellData.X < 5160)
idx_Y = (cellData.Y > 364) & (cellData.Y < 933)

In [4]:
idx_cells = idx_X & idx_Y

In [5]:
cellData = cellData[idx_cells]
cellData

Unnamed: 0,Cell_Num,X,Y,Genenames,CellGeneCount,ClassName,Prob
2171,2172,4239.491575,674.214652,"['3110035E14Rik', 'Aldoc', 'Arpp21', 'Cck', 'C...","[4.019805376912221, 0.7554797956565317, 1.0581...","['PC.CA1.2', 'PC.CA1.3', 'PC.Other1', 'PC.Othe...","[0.20653652973723954, 0.7761425952346995, 0.00..."
2179,2180,4252.174352,600.033481,"['3110035E14Rik', 'Adgrl2', 'Bcl11b', 'Cadps2'...","[2.293317222681646, 0.9480891559980522, 0.9654...","['PC.CA1.1', 'PC.CA1.2', 'PC.CA1.3']","[0.0017751618780825643, 0.06439220376280755, 0..."
2183,2184,4266.819492,540.482203,"['3110035E14Rik', 'Adgrl2', 'Arpp21', 'Cadps2'...","[4.000073206227558, 0.097958531838332, 1.01240...","['PC.CA1.1', 'PC.CA1.2', 'PC.CA1.3']","[0.01663743827097375, 0.747343854419005, 0.235..."
2184,2185,4276.054143,463.436423,"['Arpp21', 'Cplx2', 'Enc1', 'Neurod6', 'Prkca'...","[0.9089520066067369, 2.1663923738942428, 0.003...","['Astro.1', 'Astro.2', 'Astro.3', 'Astro.4', '...","[0.018709089035663554, 0.004005459948713244, 0..."
2186,2187,4280.926591,632.393148,"['3110035E14Rik', 'Adgrl2', 'Aldoc', 'Arpp21',...","[0.011796176587809393, 0.04682568952824123, 0....","['PC.CA1.1', 'PC.CA1.2', 'PC.CA1.3', 'PC.Other1']","[0.2783508985912167, 0.6623040455101729, 0.032..."
...,...,...,...,...,...,...,...
2494,2495,5129.608841,653.718098,"['3110035E14Rik', 'Adgrl2', 'Arpp21', 'Cadps2'...","[1.1101842871099195, 0.0010356793887828584, 0....","['PC.CA1.1', 'PC.CA1.2', 'PC.CA1.3']","[0.04891412778748087, 0.6073077909756837, 0.34..."
2497,2498,5132.462146,565.264648,"['3110035E14Rik', 'Arpp21', 'Cadps2', 'Cck', '...","[2.799377214476771, 0.001188136840469617, 0.09...","['PC.CA1.2', 'PC.CA1.3', 'PC.Other1', 'PC.Othe...","[0.09911427105527923, 0.8762133956724187, 0.00..."
2498,2499,5134.048417,524.040348,"['3110035E14Rik', '6330403K07Rik', 'Adgrl2', '...","[2.0132114372772496, 0.9894090522352587, 1.979...","['PC.CA1.2', 'PC.CA1.3', 'PC.Other2']","[0.28519612883965345, 0.6714476801314132, 0.04..."
2502,2503,5150.656743,593.819615,"['3110035E14Rik', 'Cadps2', 'Calb2', 'Cck', 'C...","[0.8950727856708891, 0.8739435731017965, 0.008...","['PC.CA1.1', 'PC.CA1.2', 'PC.CA1.3']","[0.1686261832935773, 0.6516667043301301, 0.179..."


### Get now the label image

In [6]:
coo = load_npz(r'/home/dimitris/dev/python/pciSeq/pciSeq/data/mouse/ca1/segmentation/label_image.coo.npz')

In [7]:
image_label = coo.toarray()

In [8]:
np.unique(image_label)

array([   0,    1,    2, ..., 3479, 3480, 3481], dtype=uint32)

### Clip the label image between the bounding box (4238, 364) and (5160, 933)

In [9]:
image_label = image_label[364:933+1, 4238:5160+1]

In [10]:
np.unique(image_label)

array([   0, 2164, 2170, 2171, 2172, 2173, 2174, 2180, 2184, 2185, 2187,
       2188, 2191, 2192, 2193, 2194, 2195, 2199, 2200, 2202, 2208, 2209,
       2210, 2216, 2218, 2223, 2224, 2226, 2230, 2233, 2234, 2239, 2240,
       2241, 2242, 2243, 2245, 2247, 2248, 2251, 2254, 2255, 2256, 2259,
       2260, 2262, 2265, 2266, 2269, 2271, 2273, 2274, 2276, 2279, 2281,
       2286, 2289, 2290, 2294, 2296, 2298, 2300, 2304, 2308, 2312, 2313,
       2314, 2316, 2318, 2320, 2322, 2323, 2326, 2327, 2332, 2333, 2334,
       2337, 2341, 2343, 2344, 2345, 2346, 2347, 2350, 2353, 2355, 2356,
       2360, 2361, 2363, 2367, 2368, 2370, 2372, 2374, 2375, 2379, 2380,
       2381, 2382, 2384, 2387, 2388, 2390, 2392, 2393, 2394, 2396, 2400,
       2406, 2408, 2412, 2414, 2415, 2421, 2422, 2423, 2424, 2425, 2428,
       2430, 2432, 2434, 2438, 2440, 2441, 2446, 2447, 2451, 2454, 2456,
       2459, 2463, 2464, 2466, 2468, 2469, 2470, 2471, 2473, 2475, 2478,
       2479, 2481, 2483, 2485, 2488, 2491, 2492, 24

### Remove cells whose centroids are outside the bounding box (4238, 364) and (5160, 933)

In [11]:
image_label=fastremap.mask_except(image_label, cellData.Cell_Num.tolist())

In [12]:
image_label.shape

(570, 923)

### Save the image_label as coo_matrix

In [13]:
save_npz('test_label_image_coo.npz', coo_matrix(image_label))

### Load now the spots inside the bounding box (4238, 364) and (5160, 933)

In [14]:
spots = pd.read_csv(r'/home/dimitris/dev/python/pciSeq/pciSeq/data/mouse/ca1/iss/spots.csv')

In [15]:
idx_x = (spots.x > 4238) & (spots.x < 5160)
idx_y = (spots.y > 364) & (spots.y < 933)

idx_spots = idx_x & idx_y

### Save the test spots

In [16]:
spots.x = spots.x - 4238
spots.y = spots.y - 364
spots[idx_spots].to_csv('test_spots.csv', index=False)