In [20]:
import pandas as pd
import seaborn as sns
import qgrid

In [4]:
netFilePath = './dm3.droVir3.net'

In [5]:
# Net File Format Reference: https://genome.ucsc.edu/goldenPath/help/net.html

data = []

def StringIsInt(s):
    try: 
        int(s)
        return True
    except ValueError:
        return False


with open(netFilePath, 'r') as netfile:
    for i, line in enumerate(netfile):
        fields = line.split()
        
        if fields[0] == 'net':
            current_chromosome = fields[1]
            continue
        
        lineIsNotSynteny = fields[0] != 'fill'
        
        if lineIsNotSynteny:
            continue
    
        # These are attributes that are mandatory and fixed    
        _, tstart, tsize, qname, plusminus, qstart, qsize = fields[:7]
        
        # There are extra attributes that are not always specified/are optional
        optional_attributes = {}


        for optional_attribute_name in fields[7::2]:
            index_of_attribute = fields.index(optional_attribute_name)
            attribute_value = fields[index_of_attribute + 1]
            optional_attributes[optional_attribute_name] = int(attribute_value) \
                                                            if StringIsInt(attribute_value)\
                                                            else attribute_value
        
        attributes_table  = {
                 'melano_chromosome': current_chromosome, 
                 'melano_start': int(tstart),
                 'melano_size': int(tsize),
                 'virilis_chromosome': qname,
                 'relative_orientation': plusminus,
                 'virilis_start': int(qstart),
                 'virilis_size': int(qsize)}
        
        
        attributes_table.update(optional_attributes)
        
        data.append(attributes_table)

        
my_data_frame = pd.DataFrame(data)

In [11]:
big_syntanic_blocks = my_data_frame.query('melano_size > 10000 and virilis_size > 10000 and type == "syn"')\
    .sort_values('score', ascending=False)

qgrid.show_grid(big_syntanic_blocks)