In [1]:
import pandas as pd
import numpy as np
import laspy
import os
import subprocess
import random

random.seed(2023)

In [2]:
def hex_to_rgb(hex):
    hex = hex.replace('#', '')
    rgb = []
    for i in (0, 2, 4):
        decimal = int(hex[i:i+2], 16)
        rgb.append(decimal)
    return tuple(rgb)

In [3]:
df = pd.read_csv(r"E:\data\Izzie\221020_IF_primary_then_seq\pciSeq\geneData.tsv", sep='\t')
df = df[['Gene','x','y','z']]

In [4]:
df = df.rename({'Gene': 'gene'}, axis=1)
print(df.shape)

(1776408, 4)


In [5]:
np.unique(df.gene).shape

(73,)

In [6]:
import json

with open(r'73_gene_colour_scheme.js') as dataFile:
    data = dataFile.read()
    # obj = data[data.find('{') : data.rfind('}')+1]
    # jsonObj = json.loads(obj)


In [7]:
data = data.replace(" function  glyphSettings() { return ", "")
data = data.replace(" }", "")
# json.loads(data)

In [8]:
data = pd.DataFrame(eval(data))
data

Unnamed: 0,gene,color,glyphName
0,Snca,#0000FF,plus
1,Cplx2,#0000FF,point
2,Lhx6,#0000FF,square
3,Col25a1,#0000FF,triangleUp
4,Pnoc,#0000FF,triangleRight
...,...,...,...
106,Plp1,#FFFFFF,cross
107,Mal,#FFFFFF,plus
108,Aldoc,#FFFFFF,circle
109,Actb,#FFFFFF,triangleDown


In [9]:
set(df.gene) - set(data.gene)

set()

In [10]:
shapes = ['star6', 'star5','diamond','square',
          'triangleUp','triangleDown','triangleRight','triangleLeft',
          'cross','plus','asterisk','circle',
          'point']

shapes = sorted(shapes)
shapes.index('star5')
shapes

['asterisk',
 'circle',
 'cross',
 'diamond',
 'plus',
 'point',
 'square',
 'star5',
 'star6',
 'triangleDown',
 'triangleLeft',
 'triangleRight',
 'triangleUp']

In [11]:
glyph_id = data.glyphName.map(shapes.index)
data['classification'] = glyph_id
data

Unnamed: 0,gene,color,glyphName,classification
0,Snca,#0000FF,plus,4
1,Cplx2,#0000FF,point,5
2,Lhx6,#0000FF,square,6
3,Col25a1,#0000FF,triangleUp,12
4,Pnoc,#0000FF,triangleRight,11
...,...,...,...,...
106,Plp1,#FFFFFF,cross,2
107,Mal,#FFFFFF,plus,4
108,Aldoc,#FFFFFF,circle,1
109,Actb,#FFFFFF,triangleDown,9


In [12]:
r = []
g = []
b = []
for index, row in data.iterrows():
    rgb = hex_to_rgb(row['color'])
    r.append(rgb[0])
    g.append(rgb[1])
    b.append(rgb[2])
data['r'] = r
data['g'] = g
data['b'] = b
df_colors = data[['gene','r','g','b']]
df_colors

Unnamed: 0,gene,r,g,b
0,Snca,0,0,255
1,Cplx2,0,0,255
2,Lhx6,0,0,255
3,Col25a1,0,0,255
4,Pnoc,0,0,255
...,...,...,...,...
106,Plp1,255,255,255
107,Mal,255,255,255
108,Aldoc,255,255,255
109,Actb,255,255,255


In [13]:
df.shape

(1776408, 4)

In [14]:
data.shape

(111, 7)

In [15]:
df = df.merge(data, on='gene')

In [16]:
df.shape

(1776408, 10)

In [17]:
genes, gene_id = np.unique(df.gene, return_inverse=True)
# df['classification'] = gene_id
# df['classification'] = [shapes.index(random.choice(shapes)) for d in gene_id]
df['pointSourceID'] = gene_id

In [18]:
df

Unnamed: 0,gene,x,y,z,color,glyphName,classification,r,g,b,pointSourceID
0,Aldoc,7267.578628,37.180359,158.016013,#FFFFFF,circle,1,255,255,255,0
1,Aldoc,6485.578628,42.180359,61.091773,#FFFFFF,circle,1,255,255,255,0
2,Aldoc,7239.578628,46.180359,158.016013,#FFFFFF,circle,1,255,255,255,0
3,Aldoc,7240.578628,50.180359,158.016013,#FFFFFF,circle,1,255,255,255,0
4,Aldoc,6557.578628,61.180359,154.554433,#FFFFFF,circle,1,255,255,255,0
...,...,...,...,...,...,...,...,...,...,...,...
1776403,Yjefn3,624.357445,1991.000000,152.001052,#FF0000,triangleDown,9,255,0,0,72
1776404,Yjefn3,442.357445,2014.000000,34.307332,#FF0000,triangleDown,9,255,0,0,72
1776405,Yjefn3,597.357445,2022.000000,162.385792,#FF0000,triangleDown,9,255,0,0,72
1776406,Yjefn3,516.357445,2033.000000,48.153652,#FF0000,triangleDown,9,255,0,0,72


In [19]:
## remove the mean from xyz coords. Actually I should be removing the width/2, height/2 and depth/2
## but this is a good enough approximation
df.x = df.x - df.x.mean()
df.y = df.y - df.y.mean()
df.z = df.z - df.z.mean()

In [20]:
# len(set(df.barcode_id))

In [21]:
df.head().values[:, [3, 4, 5, -1]]

array([[61.2245559205429, '#FFFFFF', 'circle', 0],
       [-35.699684079457114, '#FFFFFF', 'circle', 0],
       [61.2245559205429, '#FFFFFF', 'circle', 0],
       [61.2245559205429, '#FFFFFF', 'circle', 0],
       [57.7629759205429, '#FFFFFF', 'circle', 0]], dtype=object)

In [22]:
df.values[:, 6]

array([1, 1, 1, ..., 9, 9, 9], dtype=object)

In [23]:
def make_las(df, las_path):
    raw = df.values
    xyz = np.ascontiguousarray(raw[:, 1:4], dtype='float32')
    rgb = np.ascontiguousarray(raw[:, 7:10], dtype='float32')
    classification = np.ascontiguousarray(raw[:, 6], dtype='float32')
    pointSourceID = np.ascontiguousarray(raw[:, -1], dtype='float32')
    
    hdr = laspy.LasHeader(version="1.4", point_format=7)
    mins = np.floor(np.min(xyz, axis=0))
    # mins = [352, 6126, 0]
    hdr.offset = mins
    hdr.scales = np.array([0.001, 0.001, 0.001])

    # 2. Create a Las
    las = laspy.LasData(hdr)

    las.x = xyz[:,0]
    las.y = xyz[:,1]
    las.z = xyz[:,2]
    las.red = rgb[:,0] 
    las.green = rgb[:,1]
    las.blue = rgb[:,2]
    las.classification = classification
    las.pt_src_id = pointSourceID
    # las.intensity = i
    
    # out_path = r"gene_pointclouds_z_spacing_1.5micron/las/%s.las" % gene
    out_filename = os.path.join(las_path, 'izzie.las')
    if not os.path.exists(os.path.dirname(out_filename)):
       os.makedirs(os.path.dirname(out_filename))
    las.write(out_filename)
    print('las file saved at: %s ' % out_filename) 

In [24]:
def make_octree(las_path):
    exe = r"E:\potree\PotreeConverter_windows_x64\PotreeConverter.exe"
    output_dir = os.path.join(os.path.dirname(las_path), 'octree', 'izzie')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    result = subprocess.run([exe, las_path, "-o", output_dir, "-m", "poisson"], capture_output=True, shell=True)


In [25]:
# Set here where the las files will be saved. The octrees will be under the same parent dir
las_dir = os.path.join('F:\\' 'potree', 'izzie_pointcloud', 'las')
os.path.dirname(las_dir)

# make now the octree 
# mask = df.classification < 10
make_las(df, las_dir)
make_octree(las_dir)

las file saved at: F:\potree\izzie_pointcloud\las\izzie.las 


In [26]:
gene_id.max()

72