In [1]:
# shows how to read/write/convert sparse mtx files into tab separated files

In [2]:
import pandas as pd
import seaborn as sns
import os

# Let's us import sparse matrices
import scipy.io
import scipy.sparse
import csv

In [3]:
input_dir = '/projects/ps-yeolab3/iachaim/Cleber_Organoids/6_months/H10/'
output_dir = 'outputs/'

In [4]:
def read_mtx_as_dataframe(mtx_file, columns_file, rows_file):
    """
    Reads a mtx file and returns a pandas dataframe.
    
    :param mtx_file: sparse matrix
    :param columns_file: tab-separated file containing column names
    :param rows_file: tab-separated file containing row names
    
    :return df: Pandas.DataFrame()
    """
    mat = scipy.io.mmread(mtx_file)
    columns = [
        row[0] for row in csv.reader(open(columns_file), delimiter="\t")
    ]
    rows = [
        row[0] for row in csv.reader(open(rows_file), delimiter="\t")
    ]
    df = pd.DataFrame(mat.todense(), columns=columns, index=rows)
    return df

mtx = os.path.join(input_dir, "matrix.mtx")
genes = os.path.join(input_dir, "genes.tsv")
barcodes = os.path.join(input_dir, "barcodes.tsv")

raw_counts = read_mtx_as_dataframe(mtx_file=mtx, columns_file=barcodes, rows_file=genes)
raw_counts.head()

Unnamed: 0,AAACCTGAGACGCAAC-1,AAACCTGAGTCATGCT-1,AAACCTGAGTTAGCGG-1,AAACCTGCATCGGGTC-1,AAACCTGCATCTGGTA-1,AAACGGGCATTAGCCA-1,AAACGGGGTACGCACC-1,AAACGGGGTCTCTCGT-1,AAACGGGTCAGGTTCA-1,AAACGGGTCATAAAGG-1,...,TTTGGTTAGACCTAGG-1,TTTGGTTAGCCCAACC-1,TTTGGTTGTCCATCCT-1,TTTGGTTGTGTGGTTT-1,TTTGGTTTCACCCGAG-1,TTTGTCAAGACATAAC-1,TTTGTCACACACCGCA-1,TTTGTCACAGCTGTTA-1,TTTGTCAGTTGGGACA-1,TTTGTCATCCAGGGCT-1
ENSG00000243485,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENSG00000237613,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENSG00000186092,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENSG00000238009,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ENSG00000239945,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
raw_counts.to_csv("/projects/ps-yeolab3/bay001/from_alex/6_months_WT_H10_filters-Read10X_raw_counts.tsv", sep="\t")