In [1]:
import pandas as pd
import numpy as np
from scipy.optimize import nnls

In [28]:
def regress_celltype(matrix, fractions):
    """
    Regress the expression of a certain cell type
    """
    M = np.array(matrix)
    F = np.array(fractions)

    exp_list = []
    residual_list = []
    for i in range(M.shape[0]):
        res = nnls(F, M[i,:])
        exp_list.append(res[0])
        residual_list.append(res[1])

    df = pd.DataFrame(exp_list, index=list(matrix.index), columns = list(fractions.columns))
    df['residuals'] = residual_list
    return df

In [25]:
data_dir = "/home/kevin/rimod/smallRNA/frontal/analysis/deconvolution_0919/"

# load expression matrix
mat = pd.read_csv(data_dir + "deseq_rLog_values_frontal_smRNA.txt", sep="\t", index_col=0)
mat.index = [x.split(".")[0] for x in list(mat.index)]
cols = [x.replace("sample_", "") for x in list(mat.columns)]
mat.columns = [x[0:5] for x in cols]

# load predicted fractions
fracs = pd.read_csv("/home/kevin/rimod/RNAseq/analysis/deconvolution/cdn_predictions.txt", sep="\t", index_col=0)
fracs.index = [x.replace("X", "") for x in list(fracs.index)]
fracs.index = [x[0:5] for x in list(fracs.index)]


# Get common samples
cmn = set(mat.columns).intersection(set(fracs.index))
print(f"{len(cmn)} common samples")

# Subset and order according to common samples
mat = mat[cmn]
fracs = fracs.loc[cmn]


41 common samples


In [38]:
df = regress_celltype(mat, fracs)
df.loc['hsa-miR-191-5p']

Unknown              4.974948
InNeurons           15.081718
Oligodendrocytes    15.126995
Endothelial         13.688309
Microglia           23.761017
Astrocytes          16.939504
OPC                  0.000000
ExNeurons           16.222345
residuals            3.818802
Name: hsa-miR-191-5p, dtype: float64