In [12]:
import numpy as np

def necessary_information(filename):
    lengths = np.empty(0)
    longest = ''
    current = ''
    with open(filename) as fd:
        lines = fd.readlines()
    for line in lines:
        if line[0] == '>':
            if (lengths.size > 0) and (lengths[-1] > len(longest)):
                longest = current
            lengths = np.append(lengths, 0)
            current = ''
        else:
            line = line.replace(' ', '')
            line = line.replace('\n', '')
            current += line
            lengths[-1] += len(line)
    if (lengths.size > 0) and (lengths[-1] > len(longest)):
        longest = current
    return longest, lengths


In [13]:
def print_stats(lengths, name):
    lengths = np.sort(lengths)[::-1]
    sum = lengths.sum()
    print(f'Statistics for {name}:')
    print(f'Total number: {lengths.size}')
    print(f'Total length: {sum}')
    print(f'Length of the longest: {lengths[0]}')
    prefices_sum = np.cumsum(lengths)
    print(f'N50: {lengths[np.argmax(prefices_sum >= sum / 2)]}')


In [14]:
def print_gap_stats(sequence, name):
    total_number = sequence.count('AN') + sequence.count('TN') + sequence.count('GN') + sequence.count('CN') 
    total_length = sequence.count('N')
    print(f'Gap statistics for {name}:')
    print(f'Total number: {total_number}')
    print(f'Total length of gaps: {total_length}')

In [20]:
longest_contig, contig_length = necessary_information('contig/out_contig.fa')
longest_scaffold, scaffold_length = necessary_information('scaffold/Poil_scaffold.fa')
longest_scaffold_gap, scaffold_gap_length = necessary_information('gapclose/Poil_gapClosed.fa')

#print(len(longest_contig))
#print(len(longest_scaffold))
#print(len(longest_scaffold_gap))

print_stats(contig_length, 'contigs')
print()
print_stats(scaffold_length, 'scaffolds')
print()
print_gap_stats(longest_scaffold, 'longest scaffold before gapclose')
print()
print_gap_stats(longest_scaffold_gap, 'longest scaffold after gapclose')

Statistics for contigs:
Total number: 594
Total length: 3922956.0
Length of the longest: 179304.0
N50: 48055.0

Statistics for scaffolds:
Total number: 67
Total length: 3874616.0
Length of the longest: 3834998.0
N50: 3834998.0

Gap statistics for longest scaffold before gapclose:
Total number: 61
Total length of gaps: 7380

Gap statistics for longest scaffold after gapclose:
Total number: 7
Total length of gaps: 2037
