In [62]:
import pandas as pd
import anndata
from logging import getLogger

In [72]:
def read_featureCounts(path_to_featureCounts_data: str, path_to_conditions: str="") -> anndata.core.anndata.AnnData:
    featureCounts_df = pd.read_csv(path_to_featureCounts_data, sep="\t", skiprows=1, index_col=0)
    if path_to_conditions != "":
        conditions_df = pd.read_csv(path_to_conditions, sep=",", index_col=0)
        adata = anndata.AnnData(featureCounts_df.drop(['Chr', 'Start', 'End', 'Strand', 'Length'], axis=1),
                                obs=featureCounts_df[['Chr', 'Start', 'End', 'Strand', 'Length']]).T
        adata.obs = conditions_df
    else:
        print("WARNING: If you want to DEG analysis, please load conditions")
        adata = anndata.AnnData(featureCounts_df.drop(['Chr', 'Start', 'End', 'Strand', 'Length'], axis=1),
                                obs=featureCounts_df[['Chr', 'Start', 'End', 'Strand', 'Length']]).T
    return adata

In [84]:
path_to_featureCounts_data = "./data/read_featureCounts/featureCounts_sample.txt"
path_to_conditions = "./data/read_featureCounts/conditions.txt"
output_path = "./data/read_featureCounts/sample.loom"

In [76]:
adata = read_featureCounts(path_to_featureCounts_data, path_to_conditions)

In [77]:
adata.obs

Unnamed: 0_level_0,conditions
id,Unnamed: 1_level_1
k5,mock
k6,mock
k7,mock
k8,+
k9,+
k10,+


In [78]:
adata.var

Unnamed: 0_level_0,Chr,Start,End,Strand,Length
Geneid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MapolyY_A0001,Chr_Y_A,10168,10935,-,768
MapolyY_A0002,Chr_Y_A,14752,16054,-,1303
MapolyY_A0003,Chr_Y_A,69760,71066,-,1307
MapolyY_A0004,Chr_Y_A;Chr_Y_A;Chr_Y_A;Chr_Y_A;Chr_Y_A;Chr_Y_A,74668;75039;75534;75986;84872;84872,74870;75338;75600;77256;86506;86506,+;+;+;+;+;+,3476
MapolyY_A0005,Chr_Y_A,125980,126717,-,738
...,...,...,...,...,...
Mapoly0099s0063,scaffold_99;scaffold_99;scaffold_99;scaffold_99,770737;771728;772563;773007,771554;772149;772759;775346,-;-;-;-,3777
Mapoly0991s0001,scaffold_991,5953,6171,-,219
Mapoly0992s0001,scaffold_992;scaffold_992;scaffold_992;scaffol...,11;547;996;1312;1839,255;712;1187;1652;1883,-;-;-;-;-,989
Mapoly0994s0001,scaffold_994,4875,6206,-,1332


In [81]:
adata.write_loom(output_path)

In [85]:
adata_from_loom = anndata.read_loom(output_path)

In [86]:
adata_from_loom

AnnData object with n_obs × n_vars = 6 × 19287 
    obs: 'conditions'
    var: 'Chr', 'End', 'Length', 'Start', 'Strand'

In [87]:
adata_from_loom.var

Unnamed: 0,Chr,End,Length,Start,Strand
MapolyY_A0001,Chr_Y_A,10935,768,10168,-
MapolyY_A0002,Chr_Y_A,16054,1303,14752,-
MapolyY_A0003,Chr_Y_A,71066,1307,69760,-
MapolyY_A0004,Chr_Y_A;Chr_Y_A;Chr_Y_A;Chr_Y_A;Chr_Y_A;Chr_Y_A,74870;75338;75600;77256;86506;86506,3476,74668;75039;75534;75986;84872;84872,+;+;+;+;+;+
MapolyY_A0005,Chr_Y_A,126717,738,125980,-
...,...,...,...,...,...
Mapoly0099s0063,scaffold_99;scaffold_99;scaffold_99;scaffold_99,771554;772149;772759;775346,3777,770737;771728;772563;773007,-;-;-;-
Mapoly0991s0001,scaffold_991,6171,219,5953,-
Mapoly0992s0001,scaffold_992;scaffold_992;scaffold_992;scaffol...,255;712;1187;1652;1883,989,11;547;996;1312;1839,-;-;-;-;-
Mapoly0994s0001,scaffold_994,6206,1332,4875,-
