In [1]:
import numpy as np
import os, sys
sys.path.append('/home/dolley/gear/lib')
import gear.datasetuploader as datasetuploader

test_file = '/home/dolley/gear/tests/base_template.xlsx'
dataset = datasetuploader.DatasetUploader.upload_dataset('excel')
dataset._read_file(test_file)
dataset.adata.var.head()

  from ._conv import register_converters as _register_converters


Unnamed: 0_level_0,gene_symbol
genes,Unnamed: 1_level_1
ENSMUSG00000000567,Gnai3
ENSMUSG00000000568,Pbsn
ENSMUSG00000000569,Cdc45l
ENSMUSG00000000570,H19
ENSMUSG00000000571,Scml2


In [19]:
adata = dataset.adata
X = adata.X
obs = adata.obs
var = adata.var

#GET GENE SCOPE COLORING
def get_color(expression, step_size, stat_min):
    if expression is None:
        return 0
    
    if step_size == 0:
        color_idx = int(255/2)
    else:
        color_idx = int((expression - stat_min) / step_size)

    # note some manual check warnings
    if color_idx > 255:
        color_idx = 255
    elif color_idx < 0:
        color_idx = 0

    return color_idx

# GET ABSOLUTE COLORING.
def get_abs_color(expression, abs_step_size):
    if expression is None:
        return 0

    abs_steps = expression / abs_step_size
    abs_color_idx = int(abs_steps)

    if abs_color_idx > 254:
        abs_color_idx = 254
    elif abs_color_idx < 0:
        abs_color_idx = 0

    return abs_color_idx

def get_gene_coloring(adata=None, color_mode=None):

    if adata is None:
        raise Exception("Error: Argument 'adata' is None. Please include an AnnData object.")
    if color_mode is None:
        raise Exception("Error: Argument 'mode' is None. Please choose 'raw' or 'absolute'.")
        
    X = adata.X
    
    #transpose X to iterate gene by gene
    colors_by_gene = list()
    for g, gene in enumerate(X.T):
        print('ROW: ', gene)
        stat_min = None
        stat_max = None
        stat_mean = None

        #if stat_mean == 0: These will also be 0 (zero)
        fld_chg_min = None # stat_min / stat_mean
        fld_chg_max = None # stat_max / stat_mean

        #Get mean
        row_sum = np.nansum(gene)
        print('SUM: ', row_sum)
        if row_sum == 0:
            stat_mean = 0
            fld_chg_min = 0
            fld_chg_max = 0
        else:
            row_sorted = np.sort(gene.copy())
            print('SORTED: ', row_sorted)

            # Find the 95% cutoff and remove values past it
            cutoff_idx = int(len(row_sorted) * 0.95)
            print('CUTOFF: ', cutoff_idx)
            row_trimmed = row_sorted[:cutoff_idx]
            print('TRIMMED: ', row_trimmed)

            # Get min, max, and mean based on cutoff
            stat_min = np.nanmin(row_trimmed)
            stat_max = np.nanmax(row_trimmed)
            stat_mean = round( np.nanmean(row_trimmed), 4)
            print('MIN: ', stat_min)
            print('MAX: ', stat_max)
            print('MEAN: ', stat_mean)

            row_colors = list()
            if color_mode == 'raw':
                # Get the step size of the values based on 255 colors. 
                step_size = (stat_max - stat_min) / 255
                print('STEP_SIZE: ', step_size)

                for i, expression in enumerate(gene):
                    color_idx = get_color(expression, step_size, stat_min)
                    row_colors.append(color_idx)

            if color_mode == 'absolute':
                abs_step_size = stat_max / 255

                if abs_step_size == 0:
                    abs_step_size = 0.01

                for i, expression in enumerate(gene):
                    abs_color_idx = get_abs_color(expression, abs_step_size)
                    row_colors.append(abs_color_idx)

        print('GENE COLORS: ', row_colors)
        colors_by_gene.append(row_colors)

    # print(colors_by_gene)
    Xcolors = np.array(colors_by_gene)
    return Xcolors.T

In [15]:
# Try out gene RAW coloring...
XColGeneRaw = get_gene_coloring(adata, color_mode='raw')
adata.XColGeneRaw = XColGeneRaw
adata.XColGeneRaw

ROW:  [ 72.  92.  51.  93.   1.  46.   0.  33.  46.  75.  56.  28.  90. 100.
   7.  25.  40.  81.]
SUM:  936.0
SORTED:  [  0.   1.   7.  25.  28.  33.  40.  46.  46.  51.  56.  72.  75.  81.
  90.  92.  93. 100.]
CUTOFF:  17
TRIMMED:  [ 0.  1.  7. 25. 28. 33. 40. 46. 46. 51. 56. 72. 75. 81. 90. 92. 93.]
MIN:  0.0
MAX:  93.0
MEAN:  49.1765
STEP_SIZE:  0.36470588235294116
GENE COLORS:  [197, 252, 139, 255, 2, 126, 0, 90, 126, 205, 153, 76, 246, 255, 19, 68, 109, 222]
ROW:  [79. 15. 66. 74. 76. 81. 74. 22. 30.  8. 59. 60. 46. 61. 77.  7. 82. 14.]
SUM:  931.0
SORTED:  [ 7.  8. 14. 15. 22. 30. 46. 59. 60. 61. 66. 74. 74. 76. 77. 79. 81. 82.]
CUTOFF:  17
TRIMMED:  [ 7.  8. 14. 15. 22. 30. 46. 59. 60. 61. 66. 74. 74. 76. 77. 79. 81.]
MIN:  7.0
MAX:  81.0
MEAN:  49.9412
STEP_SIZE:  0.2901960784313726
GENE COLORS:  [248, 27, 203, 230, 237, 254, 230, 51, 79, 3, 179, 182, 134, 186, 241, 0, 255, 24]
ROW:  [40. 15. 94. 75. 40. 49. 81. 73. 45.  9. 14. 45. 67.  3. 40. 60. 49. 43.]
SUM:  842.0
SORTED:

TRIMMED:  [ 2.  2.  8. 18. 35. 39. 40. 44. 49. 60. 66. 80. 81. 85. 90. 92. 96.]
MIN:  2.0
MAX:  96.0
MEAN:  52.1765
STEP_SIZE:  0.3686274509803922
GENE COLORS:  [103, 238, 225, 0, 0, 244, 89, 113, 211, 43, 127, 16, 157, 255, 214, 254, 100, 173]
ROW:  [87. 81. 41. 37. 44. 29. 19. 58. 30. 10. 13. 99. 53. 30. 42. 79. 82.  6.]
SUM:  840.0
SORTED:  [ 6. 10. 13. 19. 29. 30. 30. 37. 41. 42. 44. 53. 58. 79. 81. 82. 87. 99.]
CUTOFF:  17
TRIMMED:  [ 6. 10. 13. 19. 29. 30. 30. 37. 41. 42. 44. 53. 58. 79. 81. 82. 87.]
MIN:  6.0
MAX:  87.0
MEAN:  43.5882
STEP_SIZE:  0.3176470588235294
GENE COLORS:  [255, 236, 110, 97, 119, 72, 40, 163, 75, 12, 22, 255, 147, 75, 113, 229, 239, 0]
ROW:  [11. 50. 75.  8. 66. 62. 99. 19. 55. 29. 69. 82.  6. 28. 96. 83. 70. 34.]
SUM:  942.0
SORTED:  [ 6.  8. 11. 19. 28. 29. 34. 50. 55. 62. 66. 69. 70. 75. 82. 83. 96. 99.]
CUTOFF:  17
TRIMMED:  [ 6.  8. 11. 19. 28. 29. 34. 50. 55. 62. 66. 69. 70. 75. 82. 83. 96.]
MIN:  6.0
MAX:  96.0
MEAN:  49.5882
STEP_SIZE:  0.35294117

array([[197, 248, 120, ..., 255,  14, 232],
       [252,  27,  39, ..., 236, 124,   8],
       [139, 203, 255, ..., 110, 195, 159],
       ...,
       [ 68,   0, 186, ..., 229, 218, 204],
       [109, 255, 150, ..., 239, 181, 221],
       [222,  24, 130, ...,   0,  79,  47]])

In [20]:
# Try out gene ABSOLUTE coloring...
XColGeneAbs = get_gene_coloring(adata, color_mode='absolute')
adata.XColGeneAbs = XColGeneAbs
adata.XColGeneAbs

ROW:  [ 72.  92.  51.  93.   1.  46.   0.  33.  46.  75.  56.  28.  90. 100.
   7.  25.  40.  81.]
SUM:  936.0
SORTED:  [  0.   1.   7.  25.  28.  33.  40.  46.  46.  51.  56.  72.  75.  81.
  90.  92.  93. 100.]
CUTOFF:  17
TRIMMED:  [ 0.  1.  7. 25. 28. 33. 40. 46. 46. 51. 56. 72. 75. 81. 90. 92. 93.]
MIN:  0.0
MAX:  93.0
MEAN:  49.1765
GENE COLORS:  [197, 252, 139, 254, 2, 126, 0, 90, 126, 205, 153, 76, 246, 254, 19, 68, 109, 222]
ROW:  [79. 15. 66. 74. 76. 81. 74. 22. 30.  8. 59. 60. 46. 61. 77.  7. 82. 14.]
SUM:  931.0
SORTED:  [ 7.  8. 14. 15. 22. 30. 46. 59. 60. 61. 66. 74. 74. 76. 77. 79. 81. 82.]
CUTOFF:  17
TRIMMED:  [ 7.  8. 14. 15. 22. 30. 46. 59. 60. 61. 66. 74. 74. 76. 77. 79. 81.]
MIN:  7.0
MAX:  81.0
MEAN:  49.9412
GENE COLORS:  [248, 47, 207, 232, 239, 254, 232, 69, 94, 25, 185, 188, 144, 192, 242, 22, 254, 44]
ROW:  [40. 15. 94. 75. 40. 49. 81. 73. 45.  9. 14. 45. 67.  3. 40. 60. 49. 43.]
SUM:  842.0
SORTED:  [ 3.  9. 14. 15. 40. 40. 40. 43. 45. 45. 49. 49. 60. 67. 73

MIN:  0.0
MAX:  90.0
MEAN:  42.6471
GENE COLORS:  [195, 254, 56, 73, 113, 2, 232, 99, 254, 93, 201, 164, 116, 0, 141, 22, 220, 65]
ROW:  [ 10.  78.  52.  18.  18.  67.  96.  86.  36.  92.  40.  50.  80. 100.
   8.  54.  85.  34.]
SUM:  1004.0
SORTED:  [  8.  10.  18.  18.  34.  36.  40.  50.  52.  54.  67.  78.  80.  85.
  86.  92.  96. 100.]
CUTOFF:  17
TRIMMED:  [ 8. 10. 18. 18. 34. 36. 40. 50. 52. 54. 67. 78. 80. 85. 86. 92. 96.]
MIN:  8.0
MAX:  96.0
MEAN:  53.1765
GENE COLORS:  [26, 207, 138, 47, 47, 177, 254, 228, 95, 244, 106, 132, 212, 254, 21, 143, 225, 90]
ROW:  [53. 82. 91. 60. 61.  3. 28. 84. 97.  9. 87. 52. 93. 11.  7. 67. 21. 13.]
SUM:  919.0
SORTED:  [ 3.  7.  9. 11. 13. 21. 28. 52. 53. 60. 61. 67. 82. 84. 87. 91. 93. 97.]
CUTOFF:  17
TRIMMED:  [ 3.  7.  9. 11. 13. 21. 28. 52. 53. 60. 61. 67. 82. 84. 87. 91. 93.]
MIN:  3.0
MAX:  93.0
MEAN:  48.3529
GENE COLORS:  [145, 224, 249, 164, 167, 8, 76, 230, 254, 24, 238, 142, 254, 30, 19, 183, 57, 35]
ROW:  [43. 75. 90. 11. 22. 9

array([[197, 248, 125, ..., 254,  29, 232],
       [252,  47,  47, ..., 237, 132,  11],
       [139, 207, 254, ..., 120, 199, 160],
       ...,
       [ 68,  22, 188, ..., 231, 220, 205],
       [109, 254, 154, ..., 240, 185, 221],
       [222,  44, 135, ...,  17,  90,  49]])

In [21]:
# Tissue level coloring
obs = adata.obs

# Get index positions of each tissue type

Unnamed: 0_level_0,cell_type,condition,replicate,time_point,time_unit
observations,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
obs-ctrl-1-0h,utricle,control,1,0,hour
obs-ctrl-2-0h,utricle,control,2,0,hour
obs-ctrl-3-0h,utricle,control,3,0,hour
obs-trt-1-0h,utricle,treated,1,0,hour
obs-trt-2-0h,utricle,treated,2,0,hour
obs-trt-3-0h,utricle,treated,3,0,hour
obs-ctrl-1-24h,utricle,control,1,24,hour
obs-ctrl-2-24h,utricle,control,2,24,hour
obs-ctrl-3-24h,utricle,control,3,24,hour
obs-trt-1-24h,utricle,treated,1,24,hour


In [76]:
#Prints 18 rows. 1 for each condition replicate
# for i, row in enumerate(X):
#     print(row)
    
#Get the number of replicates for each condition (tissue == condition)
from gear.datasetstats import get_replicate_counts
replicate_count = get_replicate_counts(obs)


#Group replicates by tissue (condition)
tissues = list()
count_reps = 0
start = 0
end = 0
for i, rep_count in enumerate(replicate_count):
    end += rep_count
    current_tissue = X[start:end]
    tissue_flat = current_tissue.flatten()
    
    tissues.append(tissue_flat)
    start += rep_count
    
# print(len(tissues)) #6 conditions for base_template.xslx
# print(tissues) #success! replicates are grouped

#Get the mean, min, max, etc for coloring!
    # START HERE!

6
[array([ 72.,  79.,  40.,  72.,  90.,   8.,   3.,   3.,   8.,  57., 100.,
        21.,  74.,  86.,  25.,  80.,  16.,  43.,  96.,  51.,  28.,  86.,
        92.,  68.,  98.,  89.,  77.,  43.,  25.,   3.,  59.,  21.,  69.,
        69.,  59.,  18.,  98.,  32.,  97.,  89.,  96.,  55.,  97.,  21.,
        54.,  22.,  67.,  63.,  24.,  44.,  99.,  79.,  97.,   1.,  50.,
         8.,  54.,  12.,  95.,   4.,  16.,  35.,  18.,  43.,  50.,  74.,
        94.,  71.,  89.,  51.,  55.,   6.,  72.,  35.,   4.,  56.,  42.,
        54.,  85.,  94.,  93.,  69.,  55.,  10.,  69.,  10.,  53.,  43.,
        79.,  16.,  98.,  40.,  49.,  86.,  98.,  24.,  40.,  87.,  11.,
        84.,  92.,  15.,  15.,  66.,  97.,   4.,  36.,  92.,  42.,  17.,
         3.,  73.,  55.,  13.,  46.,  53.,  26.,  43.,  67.,  33.,  60.,
        95.,   4.,  15.,  25.,  48.,   1.,  34.,  37.,  85.,  74.,  74.,
        16.,  96.,   8.,  87.,  26.,  71.,  18.,  24.,  34.,  26.,  87.,
        25., 100.,  87.,  71.,   3.,  62.,  33.,