# Notebook to convert from mtx/tsv format to h5ad

In [None]:
import logging
import anndata
try:
    import scanpy.api as sc
except:
    import scanpy as sc
    logging.warning("scGAN depends on an old version of scanpy (1.2.2)"
              " but you have '{}' installed, be cautious.".format(sc.__version__, anndata.__version__))
if anndata.__version__.split(".")[:2] != ["0","6"]:
    logging.error("scGAN depends on an old version of the anndata format (0.6)," +
              " but you have '{}'' installed.".format(anndata.__version__) + 
              " This can break scGANs ability to read this file.")
import os
import csv

## Dataset paths

In [None]:
dataset_dir = "/path_to_where_you_saved/Fresh_68k_PBMCs/hg19/"
data_file = "matrix.mtx"
var_names_file = "genes.tsv"
obs_names_file = "barcodes.tsv"
output_h5ad_file = "68kPBMCs.h5ad"

data_path = os.path.join(dataset_dir,data_file)
var_names_path = os.path.join(dataset_dir,var_names_file)
obs_names_path = os.path.join(dataset_dir,obs_names_file)
output_h5ad_path = os.path.join(dataset_dir,output_h5ad_file)

## Loading the variable (gene) names

In [None]:
with open(var_names_path, "r") as var_file:
    var_read = csv.reader(var_file, delimiter='\t')
    var_names = []
    for row in var_read:
        print(row)
        var_names.append(row[1])



## Loading the observations (UMI) names

In [None]:
with open(obs_names_path, "r") as obs_file:
    obs_read = csv.reader(obs_file, delimiter='\t')
    obs_names = []
    for row in obs_read:
        #print(row)
        obs_names.append(row[0])

## Loading the data (can take a while) and transpose it (10xgenomics has different convention than AnnData)

In [None]:
andata = sc.read(data_path) 
andata = andata.transpose()

## Loading the variable and observation names (and make them unique) into the AnnData

In [None]:
andata.var_names = var_names
andata.var_names_make_unique()
andata.obs_names = obs_names
andata.obs_names_make_unique()

## Write the AnnData into a h5ad file

In [None]:
andata.write(filename=output_h5ad_path)