## Compute average glycan intensities in each core

In [1]:
# import required packages
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import skimage.io as io

In [2]:
from alpineer import io_utils

### File paths

In [3]:
base_dir = "../data/DBDP_transition/MALDI/library_matched"

In [4]:
gly_dir = os.path.join(base_dir, 'float')
mask_dir = os.path.join(base_dir, 'fiji/Mask')
output_dir = base_dir

In [5]:
# validate paths
for directory in [gly_dir, mask_dir]:
    if os.path.exists(directory):
        print("Exists:", directory)
    else: print ("Does not exist:", directory)

Exists: ../data/DBDP_transition/MALDI/library_matched/float
Exists: ../data/DBDP_transition/MALDI/library_matched/fiji/Mask


### Get glycan and mask info

In [6]:
#get glycan info
# first check for duplicated matches - only input unique matches here
gly_filename = os.path.join(base_dir, 'matched_peaks.csv')
glycan_tab = pd.read_csv(gly_filename)
glycan_tab = glycan_tab[glycan_tab['composition'].notnull()] #filter glycan matches
markers = glycan_tab['composition'].to_list()
markers = unique_list = list(dict.fromkeys(markers)) #remove duplicates
# markers = io_utils.remove_file_extensions(io_utils.list_files(gly_dir))
num_markers = len(markers)

print(num_markers)
print(markers)

127
['H2N2F1', 'H3N2', 'H3N2F1', 'H4N2', 'H3N3', 'H5N2', 'H3N3F1', 'H4N3', 'H3N4', 'H5N2F1', 'H6N2', 'H3N3F2', 'H4N3F1', 'H5N3', 'H3N4F1', 'H4N4', 'H3N5', 'H7N2', 'H4N3S1', 'H5N3F1', 'H6N3', 'H3N4F2', 'H4N4F1', 'H5N4', 'H3N5F1', 'H4N5', 'H4N3F1S1', 'H8N2', 'H3N6', 'H5N3F2', 'H6N3F1', 'H3N4F3', 'H4N4S1', 'H5N4F1', 'H6N4', 'H4N5F1', 'H5N5', 'H3N6F1', 'H9N2', 'H4N6', 'H6N3S1', 'H6N3F2', 'H4N4F1S1', 'H5N4S1', 'H5N4F2', 'H6N4F1', 'H5N4S1+Na1', 'H4N5S1', 'H5N5F1', 'H6N5', 'H4N6F1', 'H6N3F1S1', 'H5N6', 'H5N4F1S1', 'H5N4F3', 'H6N4S1', 'H5N4F1S1+Na1', 'H4N5F1S1', 'H5N5S1', 'H5N5F2', 'H6N5F1', 'H7N5', 'H4N6S1', 'H5N6F1', 'H6N6', 'H5N4S2', 'H4N7F1', 'H5N4F2S1+Na1', 'H5N4S2+Na2', 'H5N5F1S1', 'H5N5F3', 'H6N5S1', 'H6N5F2', 'H5N5F1S1+Na1', 'H6N5S1+Na1', 'H6N7', 'H4N6F1S1', 'H5N6S1', 'H6N6F1', 'H5N4F1S2', 'H7N6', 'H5N7F1', 'H5N5S2', 'H6N5F1S1', 'H6N5F3', 'H5N6F1S1', 'H6N6S1', 'H6N6F2', 'H7N6F1', 'H4N7F1S1', 'H8N6', 'H5N7S1', 'H6N7F1', 'H7N7', 'H6N5S2', 'H5N6S2', 'H6N5S2+Na2', 'H6N6F1S1', 'H6N6F3', 'H7

In [7]:
#get mask info 
# core_filename = os.path.join(base_dir, 'Overlay Elements of UCLA_pipeline_mask.csv')
# core_tab = pd.read_csv(core_filename)
#core_tab = core_tab[core_tab['core'].notnull()] #remove rows with no annotations
masks = io_utils.remove_file_extensions(io_utils.list_files(mask_dir))
# masks.remove("nan")
# masks.remove('Thumbs.d')
num_masks = len(masks)

print(num_masks)
print(masks)

9
['DBDP_10_0_1', 'DBDP_10_0_2', 'DBDP_10_0_3', 'DBDP_8_2_2', 'DBDP_8_2_3', 'DBDP_8_2_1', 'DBDP_6_5_1', 'DBDP_6_5_2', 'DP_6_4_1']


### Calculate average intensities

In [8]:
#get average intensities in masks
mean_df = np.zeros((num_masks, num_markers))
#for i in range(1):
for i in range(num_markers):
    filename = markers[i] + ".tiff"
    impath = os.path.join(gly_dir, filename)
    im = io.imread(impath)
    im_array = np.array(im)

    mean = []
    for j in range(num_masks):
        maskname = masks[j]+ ".tif"
        maskpath = os.path.join(mask_dir, maskname)
        maskim = io.imread(maskpath)
        mask_array = np.array(maskim)
        indices = np.where(mask_array == [255])
        mean_int = np.mean(im_array[indices])
        mean.append(mean_int)

    mean_df[:,i] = mean

In [9]:
print(mean_df)

[[288.20402496 447.32391383 442.97725716 ...  65.26381581  53.46346159
  120.41896068]
 [280.16665761 463.65613152 369.98087697 ...  48.93048855  39.82466712
   72.04263032]
 [301.9873269  400.39100263 385.64700648 ...  46.45013301  42.06379241
   64.37221436]
 ...
 [357.05456193 491.22999409 435.40873029 ...  69.114893    52.30371633
  129.04760926]
 [327.98450636 414.23246867 429.10915523 ...  48.48013992  44.01429473
   69.83756453]
 [189.77910172 314.76061231 286.48572813 ...  41.52775246  41.42842178
   35.65213398]]


In [10]:
# #count number of white pixels
# total_pixels = []
# for j in range(num_masks):
#     maskname = masks[j]+ ".tiff"
#     maskpath = os.path.join(mask_dir, maskname)
#     maskim = io.imread(maskpath)
#     mask_array = np.array(maskim)
#     num_white_pix = np.sum(mask_array == 255)
#     total_pixels.append(num_white_pix)

In [11]:
# print(total_pixels)

In [12]:
# Add column for mask #
# convert to pandas dataframe to mix string and numeric data
mean_df_final = pd.DataFrame(mean_df, columns = markers)
mean_df_final['mask'] = masks

print(mean_df_final)

       H2N2F1        H3N2      H3N2F1        H4N2        H3N3         H5N2  \
0  288.204025  447.323914  442.977257  325.814005  550.286488  1169.761579   
1  280.166658  463.656132  369.980877  284.637053  876.800175  1028.198502   
2  301.987327  400.391003  385.647006  293.547170  583.814036  1141.442623   
3  267.204197  443.013905  345.435523  289.873156  822.780630   985.588356   
4  113.566478  141.943872  145.292033  110.899341  217.428036   327.219609   
5  281.137029  527.358798  398.875123  322.370482  999.716539  1082.952367   
6  357.054562  491.229994  435.408730  365.801541  830.482067  1367.165552   
7  327.984506  414.232469  429.109155  331.381083  574.939033  1287.184659   
8  189.779102  314.760612  286.485728  226.778636  483.173514   774.878125   

        H3N3F1         H4N3        H3N4     H5N2F1  ...   H7N7F1S1  \
0   912.242368  1028.261154  480.870470  84.682215  ...  68.565043   
1   921.970502  1935.837790  525.250980  80.870046  ...  52.838380   
2   977.5

In [13]:
mean_df_final.to_csv(os.path.join(output_dir,"DBDP_transition_mean_gly_permask.csv"))