# Imports

In [None]:
%env OE_LICENSE=/Users/alexpayne/oe_license.txt
import numpy as np
import sys
import pickle as pkl
import importlib
import re
from glob import glob
from openeye import oechem
oechem.OEChemIsLicensed("python")

In [None]:
from asapdiscovery.data import openeye, pdb, utils

In [None]:
utils.seqres_to_res_list

## file paths

In [None]:
mers_pdb_dir = "/Users/alexpayne/lilac-mount-point/asap-datasets/mers_fauxalysis/mers_pdb_download/"

# Goal

it feels like there should be an easier way of mutating the residue and / or generating the seqres from the amino acid sequence


# What information does a loaded pdb have?

In [None]:
pdbs = [openeye.load_openeye_pdb(pdb_path) for pdb_path in glob(f"{mers_pdb_dir}/*.pdb")]

In [None]:
pdb0 = pdbs[0]

In [None]:
type(pdb0)

In [None]:
pdb0.GetData()

## Can I get the sequence?

In [None]:
pdb0.GetTitle()

In [None]:
pdb1 = pdbs[1]
pdb1.GetTitle()

In [None]:
def openeye_get_seqres(mol):
    seqres = [data_pair.GetValue() for data_pair in oechem.OEGetPDBDataPairs(mol) if data_pair.GetTag() == "SEQRES"]
    return seqres

In [None]:
for i, data_pair in enumerate(oechem.OEGetPDBDataPairs(pdb0)):
    print(data_pair.GetTag(), ":\t", data_pair.GetValue())

In [None]:
print(i)

In [None]:
oechem.OEGetPDBData(pdb0, "HEADER")

In [None]:
oechem.OEGetPDBData(pdb1, "HEADER")

In [None]:
seqres0 = openeye_get_seqres(pdb0)
seqres1 = openeye_get_seqres(pdb1)

In [None]:
seqres0

In [None]:
seqres1

### not really, but I can get the seqres

## can i convert seqres to reslist?

### converting old script

In [None]:
seqres0[0][5]

In [None]:
seqres0[0][13:]

In [None]:
def seqres_to_res_list(seqres_str, seqres_chain_column = 11, seq_start_column = 19):
    """
    https://www.wwpdb.org/documentation/file-format-content/format33/sect3.html#SEQRES
    Parameters
    ----------
    SEQRES_str

    Returns
    -------

    """
    ## Grab the sequence from the sequence str
    ## use chain ID column
    seq_lines = [
        line[seq_start_column:]
        for line in seqres_str.split("\n")
        if len(line) > 0
        if line[seqres_chain_column] == "A"
    ]
    seq_str = " ".join(seq_lines)
    res_list = seq_str.split(" ")
    return res_list

In [None]:
seqres_to_res_list('\n'.join(seqres0), 5, 13)

In [None]:
utils.seqres_to_res_list('\n'.join(seqres0))

## Can I align two sequences?

In [None]:
alignment = oechem.OEGetAlignment(pdb1, 
                                  pdb0, 
                                  oechem.OESeqAlignmentMethod_PAM250 ## best for aligning similar sequences
                                  
                                 )

In [None]:
alignment.GetMethod()

In [None]:
oechem.OEGetAlignmentMethodName(2)

In [None]:
oechem.OESequenceAlignmentIter()

In [None]:
print(oechem.OESeqAlignmentMethod_None)
print(oechem.OESeqAlignmentMethod_Identity)
print(oechem.OESeqAlignmentMethod_GONNET)

In [None]:
ofs = oechem.oeofstream()
ofs.open("test_alignment_pdb1_pdb0.txt")

In [None]:
oechem.OEWriteAlignment(ofs, alignment)
ofs.close()

In [None]:
def openeye_get_sequence_alignment(pdb0, pdb1, out_fn):
    alignment = oechem.OEGetAlignment(pdb0, 
                                  pdb1, 
                                  oechem.OESeqAlignmentMethod_PAM250 ## best for aligning similar sequences
                                 )
    ofs = oechem.oeofstream()
    ofs.open(out_fn)
    oechem.OEWriteAlignment(ofs, alignment)
    ofs.close()
    return alignment

### conclusion...yes?

## what happens when you align proteins with a single chain?

In [None]:
pdb2 = pdbs[-4]
oechem.OEGetPDBData(pdb2, "HEADER")

In [None]:
pdb3 = pdbs[7]
oechem.OEGetPDBData(pdb3, "HEADER")

In [None]:
openeye_get_seqres(pdb2)

In [None]:
openeye_get_seqres(pdb3)

In [None]:
alignment = openeye_get_sequence_alignment(pdb2, pdb3, "test_alignment_pdb2_pdb3.txt")

In [None]:
alignment.GetChainID(0)

In [None]:
alignment = openeye_get_sequence_alignment(pdb0, pdb1, "test_alignment_pdb0_pdb1.txt")
alignment.GetChainID(0)

In [None]:
alignment.GetChainID(1)

In [None]:
pdb5 = [pdb for pdb in pdbs if "4WMD" in oechem.OEGetPDBData(pdb, "HEADER")][0]

In [None]:
multichain = openeye_get_sequence_alignment(pdb2, pdb5, "test_multichain.txt")

In [None]:
oechem.OECopyPDBData()

# How does setting PDB data work?

In [None]:
oechem.OECopyPDBData(pdb5, pdb2)

In [None]:
oechem.OEGetPDBData(pdb5, "SEQRES")

In [None]:
oechem.OEGetPDBData(pdb2, "SEQRES")

In [None]:
oechem.OEAddPDBData()

In [None]:
def openeye_copy_pdb_data(pdb0, pdb1, tag):
    ## first, delete data with that tag
    oechem.OEDeletePDBData(pdb0, tag)
    for data_pair in oechem.OEGetPDBDataPairs(pdb1):
        if data_pair.GetTag() == tag:
            oechem.OEAddPDBData(pdb0, data_pair)