In [2]:
# !pip install biobox

In [1]:
import biobox as bb
import numpy as np
import os

In [2]:
pdb_file = 'Example/Lysozyme/Lysozyme.pdb'

In [3]:
current = os.getcwd()
working = 'Fitting'
working_path = os.path.join(current, working)
try:
    os.mkdir(working_path)
    print('Making directory ', working_path)
except OSError as error:
    print(str(error)[11:])


aa_names = {
        'A': 'ALA', 'C': 'CYS', 'D': 'ASP', 'E': 'GLU',
        'F': 'PHE', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE',
        'K': 'LYS', 'L': 'LEU', 'M': 'MET', 'N': 'ASN',
        'P': 'PRO', 'Q': 'GLN', 'R': 'ARG', 'S': 'SER',
        'T': 'THR', 'V': 'VAL', 'W': 'TRP', 'Y': 'TYR'
    }

names_aa = {y: x for x, y in aa_names.items()}


def pdb_2_biobox(pdb_file):
    M = bb.Molecule()
    M.import_pdb(pdb_file)
    return M


def extract_CA_coordinates(M):
    ca_idx = (M.data['name']=='CA').values
    ca_coords = M.coordinates[0][ca_idx]
    
    if ca_coords.shape[0] != M.data['resid'].nunique():
        raise Exception("You better check your PDB... The number of CA atoms does not equal the number of ResIDs in your PDB file!") 
    else:
        return ca_coords

    
def extract_sequence(M):
    ca_idx = (M.data['name']=='CA').values
    resnames = M.data['resname'][ca_idx].map(names_aa).values
    
    if resnames.shape[0] != M.data['resid'].nunique():
        raise Exception("You better check your PDB... The number of CA atoms does not equal the number of ResIDs in your PDB file!") 
    else:
        return resnames


# def sequence_2_secondary(sequence):
    # Run some form of DSSP
    # Map output to simplistic (a, b, l) ss
    # return secondary structure
    

def write_fingerprint_file(number_chains, sequence, secondary_structure):
    
    assert isinstance(number_chains, int), 'Yikes... The number of chains is not int type!'
    
    seq_run = ''.join(list(sequence))
    ss_run = ''.join(list(secondary_structure))
    
    if len(seq_run) != len(ss_run):
        raise Exception("Uh Oh... The length of sequence and secondary structure is not equal!") 
    
    f = open(working_path+"/fingerprint.dat", "w")
    f.write(str(number_chains))
    f.write('\n \n')
    f.write(seq_run)
    f.write('\n \n')
    f.write(ss_run)
    f.close()
    
    
def write_coordinates_file(ca_coords):
    
    assert type(coords).__module__ == np.__name__, 'Thats never good... the CA coordinates are not a numpy array'
    np.savetxt(working_path+'/coordinates.dat', coords, delimiter=' ', fmt='%s',newline='\n', header='', footer='')
    
    
def write_mixture_file(default=True):
    if default:
        f = open(working_path+"/mixtureFile.dat", "w")
        f.write(str(1))
        
#     else:
#          copy input file


# def write_varysections_file():
#     auto: run beta sheet breaking code; write output sections to file
#     explicit: copy user defined file


# def write_saxs():
#     copy user defined saxs file
#     options: trim?

File exists: '/home/josh/Documents/CarbonaraDev/CarbonARA/Fitting'


In [4]:
M = pdb_2_biobox('Example/Lysozyme/Lysozyme.pdb')
coords = extract_CA_coordinates(M)
sequence = extract_sequence(M)

write_fingerprint_file(3, sequence, ['-']*len(sequence))
write_coordinates_file(coords)

write_mixture_file()

In [5]:
def section_finder(ss):
    
    '''Find protein sub-unit sections from the full secondary structure'''
    
    sections = []
    structure_change = np.diff(np.unique(ss, return_inverse=True)[1])

    for i, c in enumerate( structure_change ):

        if c!=0:
            sections.append(ss[i])

        if i==structure_change.shape[0]-1:
            sections.append(ss[i])
            
    sections = np.array(sections)
    
    return sections #, linker_indices #, structure_change


def find_linker_indices(sections):
    
    '''Find linker sub-unit section indices'''
    
    linker_indices = np.where(sections=='-')[0]
    return linker_indices


def find_sheet_indices(sections):
    
    '''Find sheet sub-unit section indices'''

    sheet_indices = np.where(sections=='S')[0]
    return sheet_indices

In [6]:
def generate_random_structures(coords_file, fingerprint_file, linker_indices):
    
    '''Generate random structures changing one linker section at a time
    
    Parameters
    coords_file:       /path/ to CA coordinates.dat file
    fingerprint_file:  /path/ to fingerprint.dat file
    linker_indices:    Indices of linker sub-unit sections of protein
    
    Return
    Generated structures are written to ~/rand_structures/.. section_*LINKERINDEX*.dat as xyz
    ''' 
    
    current = os.getcwd()
    random = 'rand_structures'
    random_working = os.path.join(current, random)

    try:
        os.mkdir(random_working)
    except OSError as error:
        print(str(error)[11:])

    # linker_indices = linker_prep(coords_file, fingerprint_file)
    
    print('Beginning random structures generation \n')
    for l in tqdm(linker_indices):
        
        outputname = '/section_'+str(l)
        !./generate_structure {fingerprint_file} {coords_file} {random_working}{outputname} {l}
        
    print('')
    print('Finished generating random structures')

In [8]:
secondary = 'TestBed/human_SMARCAL2/fingerPrint1.dat'

In [9]:
np.(secondary)

array([ 1., nan, nan])

In [None]:
sections = section_finder(secondary)
# linker_indices = find_linker_indices(sections)