In [1]:
import tiledb
import tiledbsoma
from anndata import AnnData
import scanpy as sc
import pandas as pd

In [2]:
#tiledbURI = "/home/levinsj/spatial/cosMx_tileDB/"
tiledbURI = "/home/levinsj/spatial/cosMx_tileDB/449d69fd-121e-4096-b451-a564cdb3fee7_TileDB/"

In [3]:
# set up s3 environment
config = tiledb.Config()
# config.update({"vfs.s3.region" : s3Region})
ctx = tiledb.Ctx(config)

# read in SOMACollection
pySoma = tiledbsoma.SOMACollection(tiledbURI, ctx=ctx) # for version 2
pySoma.keys()

['RNA_normalized_9133f4fe.2155.42f7.93d2.8d64a00f70e6_1',
 'negprobes',
 'uns',
 'RNA',
 'falsecode']

In [4]:
pySoma

URI:        /home/levinsj/spatial/cosMx_tileDB/449d69fd-121e-4096-b451-a564cdb3fee7_TileDB/
SOMA count: 5

In [5]:
probeNames = pd.read_csv('/home/levinsj/spatial/adata/probeNames.csv')
print(probeNames["x"])

0        AATK
1        ABL1
2        ABL2
3       ACACB
4         ACE
        ...  
996      XIST
997      XKR4
998      YBX3
999      YES1
1000    ZFP36
Name: x, Length: 1001, dtype: object


In [6]:
spike_in = ["ESRRB", "SLC12A1", "UMOD", "CD247", "SLC8A1", "SNTG1", "SLC12A3", "TRPM6", "ACSL4", "SCN2A",
          "SATB2", "STOX2", "EMCN", "MEIS2", "SEMA3A", "PLVAP", "NEGR1", "SERPINE1", "CSMD1", "SLC26A7",
          "SLC22A7", "SLC4A9", "SLC26A4", "CREB5", "HAVCR1", "REN", "AP1S3", "LAMA3", "NOS1", "PAPPA2",
          "SYNPO2", "RET", "LHX1", "SIX2", "CITED1", "WNT9B", "AQP2", "SCNN1G", "ALDH1A2", "CFH", "NTRK3",
          "WT1", "NPHS2", "PTPRQ", "CUBN", "LRP2", "SLC13A3", "ACSM2B", "SLC4A4", "PARD3", "XIST","UTY"]

In [7]:
counts = pySoma['RNA'].X['counts'].csr()

obs = pySoma['RNA'].obs.df()
transcriptCoords = tiledb.open_dataframe(pySoma['RNA'].obsm["transcriptCoords"].uri, ctx=ctx)

coordinates = obs[["x_slide_mm", "y_slide_mm"]]
adata = AnnData(counts, obs = obs, obsm={"spatial": coordinates}, dtype = "float32")

df = pd.DataFrame("probe", index=probeNames["x"], columns = ["probe"])
df[df.index == "NegativeAdd"]["probe_real"] = "null"
df.index.name = None

adata.var = df
adata.var["custom_probes"] = adata.var_names.isin(spike_in)
adata.var["orig_probes"] = ~adata.var_names.isin(spike_in + ["NegativeAdd"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[df.index == "NegativeAdd"]["probe_real"] = "null"


In [8]:
# given concerns with autoflorescence, let's annotate probe by fluor

In [9]:
fluor_codes = pd.read_csv("/home/levinsj/spatial/referenceFiles/probe_fluorophores.csv", index_col=0, header=0)

In [10]:
print(fluor_codes[["Green Spots"]])

                  Green Spots
Target                       
SystemControl144            4
SystemControl57             4
ADGRE2                      3
ADGRG5                      3
BGN                         3
...                       ...
VPREB3                      0
VWF                         0
WNT11                       0
WNT3                        0
XBP1                        0

[1157 rows x 1 columns]


In [11]:
adata.var["green_counts"] = 4
adata.var["blue_counts"] = 4
adata.var["red_counts"] = 4
adata.var["yellow_counts"] = 4

g_count_3 = list(fluor_codes[fluor_codes["Green Spots"] == 3].index)
g_count_2 = list(fluor_codes[fluor_codes["Green Spots"] == 2].index)
g_count_1 = list(fluor_codes[fluor_codes["Green Spots"] == 1].index)
g_count_0 = list(fluor_codes[fluor_codes["Green Spots"] == 0].index)

b_count_3 = list(fluor_codes[fluor_codes["Blue spots"] == 3].index)
b_count_2 = list(fluor_codes[fluor_codes["Blue spots"] == 2].index)
b_count_1 = list(fluor_codes[fluor_codes["Blue spots"] == 1].index)
b_count_0 = list(fluor_codes[fluor_codes["Blue spots"] == 0].index)

r_count_3 = list(fluor_codes[fluor_codes["Red spots"] == 3].index)
r_count_2 = list(fluor_codes[fluor_codes["Red spots"] == 2].index)
r_count_1 = list(fluor_codes[fluor_codes["Red spots"] == 1].index)
r_count_0 = list(fluor_codes[fluor_codes["Red spots"] == 0].index)

y_count_3 = list(fluor_codes[fluor_codes["Yellow spots"] == 3].index)
y_count_2 = list(fluor_codes[fluor_codes["Yellow spots"] == 2].index)
y_count_1 = list(fluor_codes[fluor_codes["Yellow spots"] == 1].index)
y_count_0 = list(fluor_codes[fluor_codes["Yellow spots"] == 0].index)

In [12]:
for i in adata.var["green_counts"].index:
    if i in g_count_3:
        adata.var["green_counts"][i] = 3
    if i in g_count_2:
        adata.var["green_counts"][i] = 2
    if i in g_count_1:
        adata.var["green_counts"][i] = 1
    if i in g_count_0:
        adata.var["green_counts"][i] = 0 

for i in adata.var["blue_counts"].index:
    if i in b_count_3:
        adata.var["blue_counts"][i] = 3
    if i in b_count_2:
        adata.var["blue_counts"][i] = 2
    if i in b_count_1:
        adata.var["blue_counts"][i] = 1
    if i in b_count_0:
        adata.var["blue_counts"][i] = 0 

for i in adata.var["red_counts"].index:
    if i in r_count_3:
        adata.var["red_counts"][i] = 3
    if i in r_count_2:
        adata.var["red_counts"][i] = 2
    if i in r_count_1:
        adata.var["red_counts"][i] = 1
    if i in r_count_0:
        adata.var["red_counts"][i] = 0 

for i in adata.var["yellow_counts"].index:
    if i in y_count_3:
        adata.var["yellow_counts"][i] = 3
    if i in y_count_2:
        adata.var["yellow_counts"][i] = 2
    if i in y_count_1:
        adata.var["yellow_counts"][i] = 1
    if i in y_count_0:
        adata.var["yellow_counts"][i] = 0 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata.var["green_counts"][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata.var["green_counts"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata.var["green_counts"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  adata.var["green_counts"][i] = 0
A value is trying to be set on a cop

In [13]:
print(adata.var)

       probe  custom_probes  orig_probes  green_counts  blue_counts  \
AATK   probe          False         True             1            1   
ABL1   probe          False         True             0            1   
ABL2   probe          False         True             0            1   
ACACB  probe          False         True             0            1   
ACE    probe          False         True             0            2   
...      ...            ...          ...           ...          ...   
XIST   probe           True        False             4            4   
XKR4   probe          False         True             1            1   
YBX3   probe          False         True             1            3   
YES1   probe          False         True             1            0   
ZFP36  probe          False         True             2            0   

       red_counts  yellow_counts  
AATK            0              2  
ABL1            1              2  
ABL2            3              0  
ACACB  

In [14]:
adata.write_h5ad(filename = "/home/levinsj/spatial/adata/7_13_23_rawExport.h5ad")
