Skip to content

Commit

Permalink
Update TOGA_assemblyStats.py
Browse files Browse the repository at this point in the history
added the -i parameter to output the full assembly vs gene TOGA class matrix
  • Loading branch information
MichaelHiller committed Feb 6, 2024
1 parent 1c9de8d commit 97eb5a1
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion supply/TOGA_assemblyStats.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@
{sys.argv[0]} <assemblies_file> -m merge (optional: -ances <ancestral_gene_file>, -pre <TOGAclass1#TOGAclass2#TOGAclass3...> )
To make the summary statistics of TOGA classification
{sys.argv[0]} <assemblies_file> -m stats -ances <ancestral_gene_file> (optional: -aN <assembly_names_file>, -d)
{sys.argv[0]} <assemblies_file> -m stats -ances <ancestral_gene_file> (optional: -aN <assembly_names_file>, -d, -i)
-pre is I#PI#UL#L#M#PM#PG#abs by default, it's to change the order in which classes are considered
-ances is a file where each line is a gene you want to keep
-aN is to specify the names of the assemblies, otherwise TOGA directory names will be used
-d is to display the statistics for each class TOGA recognizes
-i is to output the full matrix with the class of each gene in each assembly (use with -m stats)
"""

Expand All @@ -45,6 +46,7 @@
parser.add_argument('-pre', '--precedence',default="I#PI#UL#L#M#PM#PG#N#abs")
parser.add_argument('-aN', '--assemblyNames',default=False)
parser.add_argument('-d', '--detailed',action='store_true') # on/off flag
parser.add_argument('-i', '--intermediate',action='store_true')
ARGS = parser.parse_args()


Expand Down Expand Up @@ -160,6 +162,8 @@ def stats(df):
if ARGS.mode=="stats":
print("Calculating statistics")
stats(CLASSES)
if ARGS.intermediate!=False:
CLASSES.rename(columns=NAMES).to_csv(filename+"_full.tsv",sep="\t")
elif ARGS.mode=="merge":
print("Merging in progress")
genes=merge(CLASSES, "genes")
Expand Down

0 comments on commit 97eb5a1

Please sign in to comment.