In [6]:
import pandas as pd
import numpy as np
from scipy.optimize import nnls
from pathlib import Path

In [2]:
def regress_celltype(matrix, fractions):
    """
    Regress the expression of a certain cell type
    """
    M = np.array(matrix)
    F = np.array(fractions)

    exp_list = []
    residual_list = []
    for i in range(M.shape[0]):
        res = nnls(F, M[i,:])
        exp_list.append(res[0])
        residual_list.append(res[1])

    df = pd.DataFrame(exp_list, index=list(matrix.index), columns = list(fractions.columns))
    df['residuals'] = residual_list
    return df

In [8]:
base_path = Path("/Users/kevin/dzne/rimod_package")

# load expression matrix
mat_file = base_path / "smRNAseq/analysis/deconvolution_0919" / "deseq_rLog_values_frontal_smRNA.txt"
mat = pd.read_csv(mat_file, sep="\t", index_col=0)
mat.index = [x.split(".")[0] for x in list(mat.index)]
cols = [x.replace("sample_", "") for x in list(mat.columns)]
mat.columns = [x[0:5] for x in cols]

# load predicted fractions
fracs_file = base_path / "analysis/deconvolution/cdn_predictions.txt"
fracs = pd.read_csv(fracs_file, sep="\t", index_col=0)
fracs.index = [x.replace("X", "") for x in list(fracs.index)]
fracs.index = [x[0:5] for x in list(fracs.index)]


# Get common samples
cmn = set(mat.columns).intersection(set(fracs.index))
print(f"{len(cmn)} common samples")

# Subset and order according to common samples
mat = mat[cmn]
fracs = fracs.loc[cmn]


41 common samples


# Regress expression of every miRNA

In [17]:
df = regress_celltype(mat, fracs)
df.to_csv(base_path / "smRNAseq/analysis/deconvolution_0919" / "miRNA_celltype_expression.txt", sep="\t")