Hmmer3 rdf xml

cmzmasek edited this page Aug 25, 2011 · 34 revisions

Executing HMMERs hmmscan ("domtblout" for per-domain queries and "tblout" for per-protein queries) and returning the result in RDF/XML:

as gist

#!/usr/bin/env ruby
# code is currently at: https://github.com/cmzmasek/bioruby/tree/master/lib/bio/
# launch with (by way of example): ruby -I ~/MY_BIORUBY/bioruby/lib/ hmmer_02.rb
require 'bio'
factory = Bio::HMMER3.new('/home/zma/SOFTWARE/hmmer-3.0/src/hmmscan',
                          '/home/zma/DATA/PFAM/PFAM25/Pfam-A.hmm',
                          '/home/zma/SOFTWARE_DEV/bioruby_hmmer/APAF_HUMAN.fasta',
                          '--domtblout' )
puts factory.options
report = factory.query
puts report.to_rdf

puts

factory = Bio::HMMER3.new('/home/zma/SOFTWARE/hmmer-3.0/src/hmmscan',
                          '/home/zma/DATA/PFAM/PFAM25/Pfam-A.hmm',
                          '/home/zma/SOFTWARE_DEV/bioruby_hmmer/APAF_HUMAN.fasta',
                          '--tblout' )
puts factory.options
puts report.to_rdf

RDF/XML HMMER3 Output

Please comment on this and help improve it!

"domtblout" for per-domain queries:

as gist

<?xml version="1.0"?>
<rdf:RDF
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 xmlns:hmmer_hit="http://www.open-bio.org/core/hmmer_hit/"
>
    <rdf:Description rdf:about="http://pfam.sanger.ac.uk/family?acc=PF00400.26">
            <hmmer_hit:target_name>WD40</hmmer_hit:target_name>
            <hmmer_hit:target_length>39</hmmer_hit:target_length>
            <hmmer_hit:query_name>sp|O14727|APAF_HUMAN</hmmer_hit:query_name>
            <hmmer_hit:query_length>1248</hmmer_hit:query_length>
            <hmmer_hit:full_sequence_e_value>2.0e-82</hmmer_hit:full_sequence_e_value>
            <hmmer_hit:full_sequence_score>268.1</hmmer_hit:full_sequence_score>
            <hmmer_hit:full_sequence_bias>47.4</hmmer_hit:full_sequence_bias>
            <hmmer_hit:domain_number>1</hmmer_hit:domain_number>
            <hmmer_hit:domain_sum>13</hmmer_hit:domain_sum>
            <hmmer_hit:domain_c_e_value>1.7e-09</hmmer_hit:domain_c_e_value>
            <hmmer_hit:domain_i_e_value>5.2e-06</hmmer_hit:domain_i_e_value>
            <hmmer_hit:domain_score>25.8</hmmer_hit:domain_score>
            <hmmer_hit:domain_bias>0.0</hmmer_hit:domain_bias>
            <hmmer_hit:hmm_coord_from>7</hmmer_hit:hmm_coord_from>
            <hmmer_hit:hmm_coord_to>38</hmmer_hit:hmm_coord_to>
            <hmmer_hit:ali_coord_from>611</hmmer_hit:ali_coord_from>
            <hmmer_hit:ali_coord_to>642</hmmer_hit:ali_coord_to>
            <hmmer_hit:env_coord_from>607</hmmer_hit:env_coord_from>
            <hmmer_hit:env_coord_to>643</hmmer_hit:env_coord_to>
            <hmmer_hit:acc>0.92</hmmer_hit:acc>
            <hmmer_hit:target_description>WD domain, G-beta repeat</hmmer_hit:target_description>
    </rdf:Description> 
    <rdf:Description rdf:about="http://pfam.sanger.ac.uk/family?acc=PF00400.26">
            <hmmer_hit:target_name>WD40</hmmer_hit:target_name>
            <hmmer_hit:target_length>39</hmmer_hit:target_length>
            <hmmer_hit:query_name>sp|O14727|APAF_HUMAN</hmmer_hit:query_name>
            <hmmer_hit:query_length>1248</hmmer_hit:query_length>
            <hmmer_hit:full_sequence_e_value>2.0e-82</hmmer_hit:full_sequence_e_value>
            <hmmer_hit:full_sequence_score>268.1</hmmer_hit:full_sequence_score>
            <hmmer_hit:full_sequence_bias>47.4</hmmer_hit:full_sequence_bias>
            <hmmer_hit:domain_number>2</hmmer_hit:domain_number>
            <hmmer_hit:domain_sum>13</hmmer_hit:domain_sum>
            <hmmer_hit:domain_c_e_value>2.8e-13</hmmer_hit:domain_c_e_value>
            <hmmer_hit:domain_i_e_value>8.5e-10</hmmer_hit:domain_i_e_value>
            <hmmer_hit:domain_score>37.8</hmmer_hit:domain_score>
            <hmmer_hit:domain_bias>0.0</hmmer_hit:domain_bias>
            <hmmer_hit:hmm_coord_from>2</hmmer_hit:hmm_coord_from>
            <hmmer_hit:hmm_coord_to>39</hmmer_hit:hmm_coord_to>
            <hmmer_hit:ali_coord_from>648</hmmer_hit:ali_coord_from>
            <hmmer_hit:ali_coord_to>685</hmmer_hit:ali_coord_to>
            <hmmer_hit:env_coord_from>647</hmmer_hit:env_coord_from>
            <hmmer_hit:env_coord_to>685</hmmer_hit:env_coord_to>
            <hmmer_hit:acc>0.94</hmmer_hit:acc>
            <hmmer_hit:target_description>WD domain, G-beta repeat</hmmer_hit:target_description>
    </rdf:Description> 
    <rdf:Description rdf:about="http://pfam.sanger.ac.uk/family?acc=PF00400.26">
            <hmmer_hit:target_name>WD40</hmmer_hit:target_name>
            <hmmer_hit:target_length>39</hmmer_hit:target_length>
            <hmmer_hit:query_name>sp|O14727|APAF_HUMAN</hmmer_hit:query_name>
            <hmmer_hit:query_length>1248</hmmer_hit:query_length>
            <hmmer_hit:full_sequence_e_value>2.0e-82</hmmer_hit:full_sequence_e_value>
            <hmmer_hit:full_sequence_score>268.1</hmmer_hit:full_sequence_score>
            <hmmer_hit:full_sequence_bias>47.4</hmmer_hit:full_sequence_bias>
            <hmmer_hit:domain_number>3</hmmer_hit:domain_number>
            <hmmer_hit:domain_sum>13</hmmer_hit:domain_sum>
            <hmmer_hit:domain_c_e_value>1.6e-10</hmmer_hit:domain_c_e_value>
            <hmmer_hit:domain_i_e_value>4.8e-07</hmmer_hit:domain_i_e_value>
            <hmmer_hit:domain_score>29.1</hmmer_hit:domain_score>
            <hmmer_hit:domain_bias>0.4</hmmer_hit:domain_bias>
            <hmmer_hit:hmm_coord_from>1</hmmer_hit:hmm_coord_from>
            <hmmer_hit:hmm_coord_to>39</hmmer_hit:hmm_coord_to>
            <hmmer_hit:ali_coord_from>689</hmmer_hit:ali_coord_from>
            <hmmer_hit:ali_coord_to>729</hmmer_hit:ali_coord_to>
            <hmmer_hit:env_coord_from>689</hmmer_hit:env_coord_from>
            <hmmer_hit:env_coord_to>729</hmmer_hit:env_coord_to>
            <hmmer_hit:acc>0.97</hmmer_hit:acc>
            <hmmer_hit:target_description>WD domain, G-beta repeat</hmmer_hit:target_description>
    </rdf:Description>
...
</rdf:RDF>

"tblout" for per-protein queries:

[as gist ] (https://gist.github.com/1170405)

<?xml version="1.0"?>
<rdf:RDF
 xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 xmlns:hmmer_hit="http://www.open-bio.org/core/hmmer_hit/"
>
    <rdf:Description rdf:about="http://pfam.sanger.ac.uk/family?acc=PF00400.26">
            <hmmer_hit:target_name>WD40</hmmer_hit:target_name>
            <hmmer_hit:query_name>sp|O14727|APAF_HUMAN</hmmer_hit:query_name>
            <hmmer_hit:full_sequence_e_value>2.0e-82</hmmer_hit:full_sequence_e_value>
            <hmmer_hit:full_sequence_score>268.1</hmmer_hit:full_sequence_score>
            <hmmer_hit:full_sequence_bias>47.4</hmmer_hit:full_sequence_bias>
            <hmmer_hit:best_1_domain_e_value>1.1e-12</hmmer_hit:best_1_domain_e_value>
            <hmmer_hit:best_1_domain_score>47.0</hmmer_hit:best_1_domain_score>
            <hmmer_hit:best_1_domain_bias>0.1</hmmer_hit:best_1_domain_bias>
            <hmmer_hit:domain_number_est_exp>13</hmmer_hit:domain_number_est_exp>
            <hmmer_hit:domain_number_est_reg>13</hmmer_hit:domain_number_est_reg>
            <hmmer_hit:domain_number_est_clu>0</hmmer_hit:domain_number_est_clu>
            <hmmer_hit:domain_number_est_ov>0</hmmer_hit:domain_number_est_ov>
            <hmmer_hit:domain_number_est_env>13</hmmer_hit:domain_number_est_env>
            <hmmer_hit:domain_number_est_dom>13</hmmer_hit:domain_number_est_dom>
            <hmmer_hit:domain_number_est_rep>13</hmmer_hit:domain_number_est_rep>
            <hmmer_hit:domain_number_est_inc>13</hmmer_hit:domain_number_est_inc>
            <hmmer_hit:target_description>WD domain, G-beta repeat</hmmer_hit:target_description>
    </rdf:Description> 
    <rdf:Description rdf:about="http://pfam.sanger.ac.uk/family?acc=PF00931.16">
            <hmmer_hit:target_name>NB-ARC</hmmer_hit:target_name>
            <hmmer_hit:query_name>sp|O14727|APAF_HUMAN</hmmer_hit:query_name>
            <hmmer_hit:full_sequence_e_value>5.5e-75</hmmer_hit:full_sequence_e_value>
            <hmmer_hit:full_sequence_score>251.5</hmmer_hit:full_sequence_score>
            <hmmer_hit:full_sequence_bias>0.7</hmmer_hit:full_sequence_bias>
            <hmmer_hit:best_1_domain_e_value>9.1e-75</hmmer_hit:best_1_domain_e_value>
            <hmmer_hit:best_1_domain_score>250.8</hmmer_hit:best_1_domain_score>
            <hmmer_hit:best_1_domain_bias>0.5</hmmer_hit:best_1_domain_bias>
            <hmmer_hit:domain_number_est_exp>1</hmmer_hit:domain_number_est_exp>
            <hmmer_hit:domain_number_est_reg>1</hmmer_hit:domain_number_est_reg>
            <hmmer_hit:domain_number_est_clu>0</hmmer_hit:domain_number_est_clu>
            <hmmer_hit:domain_number_est_ov>0</hmmer_hit:domain_number_est_ov>
            <hmmer_hit:domain_number_est_env>1</hmmer_hit:domain_number_est_env>
            <hmmer_hit:domain_number_est_dom>1</hmmer_hit:domain_number_est_dom>
            <hmmer_hit:domain_number_est_rep>1</hmmer_hit:domain_number_est_rep>
            <hmmer_hit:domain_number_est_inc>1</hmmer_hit:domain_number_est_inc>
            <hmmer_hit:target_description>NB-ARC domain</hmmer_hit:target_description>
    </rdf:Description> 
    <rdf:Description rdf:about="http://pfam.sanger.ac.uk/family?acc=PF00619.15">
            <hmmer_hit:target_name>CARD</hmmer_hit:target_name>
            <hmmer_hit:query_name>sp|O14727|APAF_HUMAN</hmmer_hit:query_name>
            <hmmer_hit:full_sequence_e_value>8.8e-19</hmmer_hit:full_sequence_e_value>
            <hmmer_hit:full_sequence_score>66.8</hmmer_hit:full_sequence_score>
            <hmmer_hit:full_sequence_bias>0.2</hmmer_hit:full_sequence_bias>
            <hmmer_hit:best_1_domain_e_value>2.6e-18</hmmer_hit:best_1_domain_e_value>
            <hmmer_hit:best_1_domain_score>65.3</hmmer_hit:best_1_domain_score>
            <hmmer_hit:best_1_domain_bias>0.1</hmmer_hit:best_1_domain_bias>
            <hmmer_hit:domain_number_est_exp>1</hmmer_hit:domain_number_est_exp>
            <hmmer_hit:domain_number_est_reg>1</hmmer_hit:domain_number_est_reg>
            <hmmer_hit:domain_number_est_clu>0</hmmer_hit:domain_number_est_clu>
            <hmmer_hit:domain_number_est_ov>0</hmmer_hit:domain_number_est_ov>
            <hmmer_hit:domain_number_est_env>1</hmmer_hit:domain_number_est_env>
            <hmmer_hit:domain_number_est_dom>1</hmmer_hit:domain_number_est_dom>
            <hmmer_hit:domain_number_est_rep>1</hmmer_hit:domain_number_est_rep>
            <hmmer_hit:domain_number_est_inc>1</hmmer_hit:domain_number_est_inc>
            <hmmer_hit:target_description>Caspase recruitment domain</hmmer_hit:target_description>
    </rdf:Description> 
    <rdf:Description rdf:about="http://pfam.sanger.ac.uk/family?acc=PF05729.6">
            <hmmer_hit:target_name>NACHT</hmmer_hit:target_name>
            <hmmer_hit:query_name>sp|O14727|APAF_HUMAN</hmmer_hit:query_name>
            <hmmer_hit:full_sequence_e_value>0.011</hmmer_hit:full_sequence_e_value>
            <hmmer_hit:full_sequence_score>15.0</hmmer_hit:full_sequence_score>
            <hmmer_hit:full_sequence_bias>0.1</hmmer_hit:full_sequence_bias>
            <hmmer_hit:best_1_domain_e_value>0.041</hmmer_hit:best_1_domain_e_value>
            <hmmer_hit:best_1_domain_score>13.2</hmmer_hit:best_1_domain_score>
            <hmmer_hit:best_1_domain_bias>0.1</hmmer_hit:best_1_domain_bias>
            <hmmer_hit:domain_number_est_exp>2</hmmer_hit:domain_number_est_exp>
            <hmmer_hit:domain_number_est_reg>1</hmmer_hit:domain_number_est_reg>
            <hmmer_hit:domain_number_est_clu>0</hmmer_hit:domain_number_est_clu>
            <hmmer_hit:domain_number_est_ov>0</hmmer_hit:domain_number_est_ov>
            <hmmer_hit:domain_number_est_env>1</hmmer_hit:domain_number_est_env>
            <hmmer_hit:domain_number_est_dom>1</hmmer_hit:domain_number_est_dom>
            <hmmer_hit:domain_number_est_rep>1</hmmer_hit:domain_number_est_rep>
            <hmmer_hit:domain_number_est_inc>0</hmmer_hit:domain_number_est_inc>
            <hmmer_hit:target_description>NACHT domain</hmmer_hit:target_description>
    </rdf:Description> 
</rdf:RDF>
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.
Press h to open a hovercard with more details.