# Download [RNA base triple database](http://rna.bgsu.edu/triples/triples.php)

In [1]:
import os, sys, shutil
import pathlib
import glob as glob
import numpy as np
import json
import wget
from itertools import product
from zipfile import ZipFile
from pdbfixer import PDBFixer
import warnings
from openbabel import openbabel
#import time

In [2]:
#openbabel.obErrorLog.SetOutputLevel(0)
openbabel.obErrorLog.StopLogging()

In [3]:
#warnings.filterwarnings("ignore")
#sys.stderr = sys.__stderr__

In [4]:
url = "http://rna.bgsu.edu/triples/zip"
release_version = "v1.4"

In [5]:
base_path = os.path.dirname(os.path.abspath("__file__")).strip('notebooks')
output_path = os.path.join(base_path, "pdb", "triplebase")

In [6]:
if os.path.isdir(output_path):
    print(">remove directory: {}".format(output_path))
    shutil.rmtree(output_path)
    
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True) 

In [7]:
d = "GCAU"
arr = list(product(d, repeat=3))

In [8]:
for a in arr:
    seq = ''.join(a)    
    _output_path = os.path.join(output_path, seq)
    _url = os.path.join(url, release_version, seq + ".zip")
    
    #print('{}.zip'.format(seq))
    wget.download(_url, out=output_path, bar=None)     
    shutil.unpack_archive('{}.zip'.format(_output_path), _output_path)

In [9]:
# delete zip file
for a in arr:
    seq = ''.join(a)
    os.remove(os.path.join(output_path, seq + ".zip"))

### load pdb with openbabel and resave as pdb

In [10]:
files = glob.glob(output_path + "/*/*.pdb")

In [11]:
for f in files:
    print(os.path.basename(f))

    f_org = f + ".org"
    shutil.move(f, f_org)

    obConversion = openbabel.OBConversion()
    obConversion.SetInAndOutFormats("pdb", "pdb")
    mol = openbabel.OBMol()    
    #print("n_atoms: {}, n_bonds: {}, n_residues: {}".format(mol.NumAtoms(), mol.NumBonds(), mol.NumResidues()))

    obConversion.ReadFile(mol, f_org)
    #mol.AddHydrogens()
    mol.DeleteHydrogens()    
    obConversion.WriteFile(mol, f)

    #time.sleep(1)

Triple_cWH_cSS_AGG.pdb
Triple_cSH_cSW_AGG.pdb
Triple_cWW_tSH_AGG.pdb
Triple_cWW_cSS_AGG.pdb
Triple_cSW_cSW_AGG.pdb
Triple_cWH_tSH_AGG.pdb
Triple_tWH_cSW_AGG.pdb
Triple_cHW_tSH_AGG.pdb
Triple_cHH_cSS_AGG.pdb
Triple_cHH_tSH_AGG.pdb
Triple_tHH_cSW_AGG.pdb
Triple_cHW_cSS_AGG.pdb
Triple_cSW_tHH_AGG.pdb
Triple_cWW_cHW_AGG.pdb
Triple_cSW_cHS_AGG.pdb
Triple_cHW_cHW_AGG.pdb
Triple_cSH_cSH_AGG.pdb
Triple_cSW_tsS_AGG.pdb
Triple_tWH_tSS_AGG.pdb
Triple_tWH_cSH_AGG.pdb
Triple_cSW_cSH_AGG.pdb
Triple_cSH_tSS_AGG.pdb
Triple_tHH_tSS_AGG.pdb
Triple_tHH_cSH_AGG.pdb
Triple_cWW_tHS_AGG.pdb
Triple_cSW_tHW_AGG.pdb
Triple_cWW_cHH_AGG.pdb
Triple_tHH_tSS_AGG_exemplar.pdb
Triple_cHW_tHS_AGG.pdb
Triple_cHW_cHH_AGG.pdb
Triple_cWH_tSS_AGG.pdb
Triple_cWW_cSH_AGG.pdb
Triple_cWW_tsS_AGG.pdb
Triple_cWH_cSH_AGG.pdb
Triple_cHW_cSH_AGG.pdb
Triple_cHH_tSS_AGG.pdb
Triple_cHH_cSH_AGG.pdb
Triple_cHW_tsS_AGG.pdb
Triple_cSW_cHH_AGG.pdb
Triple_cWW_tHW_AGG.pdb
Triple_cSW_tHS_AGG.pdb
Triple_cHW_tHW_AGG.pdb
Triple_cSH_tSH_AGG.pdb
Tr

In [12]:
for f in files:
    basename = os.path.basename(f)

    try:
        # check converted pdb with PDBFixer
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("default")
            fixer = PDBFixer(filename=f)    

        # raise warning if duplicate residue exists
        if len(w) != 0:
            #print("{}: {}".format(basename, w[0]))  
            print("{}: Warning".format(basename))
    except:
        print("{}: Invalid".format(basename))

