# PhyloPandas Tutorial

## Read in sequence and alignment data

In [1]:
import phylopandas as pd
import dendropy

# Read in a newick tree format
df = pd.read_newick('PF08793_seed.newick')


# Show the tree data in a notebook
df = df.phylo.read_fasta('PF08793_seed.fasta', combine_on='id')

In [2]:
t = dendropy.TaxonNamespace()

In [8]:
t.add_taxon?


[0;31mSignature:[0m [0mt[0m[0;34m.[0m[0madd_taxon[0m[0;34m([0m[0mtaxon[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Adds a new |Taxon| object to ``self``.

If ``taxon`` is not already in the collection of |Taxon| objects in this
namespace, and this namespace is mutable, it is added to the
collection. If it is already in the collection, then nothing happens.
If it is not already in the collection, but the namespace is not
mutable, then TypeError is raised.

Parameters
----------
taxon : |Taxon|
    The |Taxon| object to be accessioned or registered in this
    collection.

Raises
------
TypeError
    If this namespace is immutable (i.e.
    :attr:`TaxonNamespace.is_mutable` is |False|).
[0;31mFile:[0m      ~/miniconda3/envs/omnipotent/lib/python3.6/site-packages/dendropy/datamodel/taxonmodel.py
[0;31mType:[0m      method


In [2]:
t = pd.treeio.write._write(df)

In [3]:
t.print_plot()

                                                             /------ YCuEYsCZuD
                              /------------------------------+                 
                              |                              \------ BdNraLCnAs
                              |                                                
                              |                              /------ FGwGPxSlfZ
                              |                        /-----+                 
                              |                        |     \------ tqfa0BNQli
                        /-----+                  /-----+                       
                        |     |                  |     |     /------ o4Lv7kbCBL
                        |     |            /-----+     \-----+                 
                        |     |            |     |           \------ sl6FL2DR1X
                        |     |      /-----+     |                             
                        |     |      |  

In [22]:
df[df['parent'] == '2']

Unnamed: 0,distance,id,label,length,parent,type,description,sequence
TG320wicc3,0.919775,3,3,0.271689,2,node,,
8EWDET2GTs,1.59117,Q0E553_SFAVA/142-176,Q0E553_SFAVA/142-176,0.943087,2,leaf,Q0E553_SFAVA/142-176,YCTNFHR----DE-SRNPLTGKKLVPTSPIRKAWHKMCSG


In [8]:
df.loc['6aGFvY9QLQ']
a

distance          0
id                0
label             0
length            0
parent         None
type           root
description     NaN
sequence        NaN
Name: 6aGFvY9QLQ, dtype: object

In [4]:
import dendropy

In [3]:
dendropy.Tree?df

Object `dendropy.Tree` not found.


In [24]:
n = dendropy.Node.set_child_nodes?

[0;31mSignature:[0m [0mdendropy[0m[0;34m.[0m[0mNode[0m[0;34m.[0m[0mset_child_nodes[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mchild_nodes[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Assigns the set of child nodes for this node.

Results in the ``parent_node`` attribute of each |Node| in ``nodes``
as well as the ``tail_node`` attribute of corresponding |Edge|
objects being assigned to ``self``.

Parameters
----------
child_nodes : collections.Iterable[|Node|]
    The (iterable) collection of child nodes to be assigned this node
    as a parent.
[0;31mFile:[0m      ~/miniconda3/envs/omnipotent/lib/python3.6/site-packages/dendropy/datamodel/treemodel.py
[0;31mType:[0m      function


In [7]:
n = dendropy.Node?

[0;31mInit signature:[0m [0mdendropy[0m[0;34m.[0m[0mNode[0m[0;34m([0m[0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m      A :term:|Node| on a :term:|Tree|.
[0;31mInit docstring:[0m
Keyword Arguments
-----------------
taxon : |Taxon|, optional
    The |Taxon| instance representing the operational taxonomic
    unit concept associated with this Node.
label : string, optional
    A label for this node.
edge_length : numeric, optional
    Length or weight of the edge subtending this node.
[0;31mFile:[0m           ~/miniconda3/envs/omnipotent/lib/python3.6/site-packages/dendropy/datamodel/treemodel.py
[0;31mType:[0m           type


In [10]:
dendropy.Tree?

[0;31mInit signature:[0m [0mdendropy[0m[0;34m.[0m[0mTree[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
An arborescence, i.e. a fully-connected directed acyclic graph with all
edges directing away from the root and toward the tips. The "root" of the
tree is represented by the :attr:`Tree.seed_node` attribute.  In unrooted
trees, this node is an algorithmic artifact. In rooted trees this node is
semantically equivalent to the root.
[0;31mInit docstring:[0m
The constructor can optionally construct a |Tree| object by
cloning another |Tree| object passed as the first positional
argument, or out of a data source if ``stream`` and ``schema`` keyword
arguments are passed with a file-like object and a schema-specification
string object values respectively.

Parameters
----------

\*args : positional argument, optional
    If given, should be exactly one |Tree| object. The new
    |Tree| will then be

In [8]:
dendropy.Taxon?

[0;31mInit signature:[0m [0mdendropy[0m[0;34m.[0m[0mTaxon[0m[0;34m([0m[0mlabel[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m      A taxon associated with a sequence or a node on a tree.
[0;31mInit docstring:[0m
Parameters
----------
label : string or |Taxon| object
    Label or name of this operational taxonomic unit concept. If a
    string, then the ``label`` attribute of ``self`` is set to this value.
    If a |Taxon| object, then the ``label`` attribute of ``self`` is
    set to the same value as the ``label`` attribute the other
    |Taxon| object and all annotations/metadata are copied.
[0;31mFile:[0m           ~/miniconda3/envs/omnipotent/lib/python3.6/site-packages/dendropy/datamodel/taxonmodel.py
[0;31mType:[0m           type


In [None]:
import phylopandas as pd
from IPython.display import display

# Read fasta file as DataFrame
df = pd.read_fasta('PF08793_seed.fasta')
display(df)

df = df.phylo.read_fasta('PF08793_seed.fasta', on='id')
display(df)

In [2]:
import phylopandas as pd

# Read fasta file as DataFrame
df = pd.read_fasta('PF08793_seed.fasta')

# Write to other formats
df.phylo.to_phylip('PF08793_seed.phylip')
df.phylo.to_clustal('PF08793_seed.clustal')

# Show DataFrame in notebook
df

Unnamed: 0,description,id,label,sequence
3WFZUpDPaM,seq-0 Q0E553_SFAVA/184-218,seq-0,seq-0,KCIAFDK----ND-KINPFTGRPINENNDTYRMIYSMCHG
GvjE6xhJYL,seq-1 Q8QUQ5_ISKNN/123-157,seq-1,seq-1,ACALYYD----DP-TVNPFTDEPLRRYSPIDDLLYRNCES
GLxde4wm1g,seq-2 Q0E553_SFAVA/142-176,seq-2,seq-2,YCTNFHR----DE-SRNPLTGKKLVPTSPIRKAWHKMCSG
AOJeKQ9pqv,seq-3 Q8QUQ5_ISKNN/45-79,seq-3,seq-3,LCAEYKR----SP-RYNPWTDRTLAPGSPKHNLISGMCGG
oXwsjIh0Me,seq-4 Q8QUQ6_ISKNN/37-75,seq-4,seq-4,VCNDLALCSQHTD-TYNPWTDRALLPDSPVHDMIDYVCNT
aoylt2Iqrc,seq-5 019R_FRG3G/249-283,seq-5,seq-5,VCERFAA----DP-TRNPVTGSPLSRNDPLYTDLMEICKG
m3TpvRyP4C,seq-6 019R_FRG3G/302-336,seq-6,seq-6,TCEAFCR----DP-TRNPVTGQKMRRNGIEYQMFAEECDC
870udBVwIr,seq-7 VF380_IIV6/7-45,seq-7,seq-7,KCDEWEKIRLNSS-PKNPFTKRNVKKDGPTYKKIDLICKH
ONJYTsRDw9,seq-8 VF380_IIV3/8-47,seq-8,seq-8,KCYEWDIAKKKSPLPKSPLTGRKLKQHGPTWKKITAECAT
6Y8wVwbXNh,seq-9 VF378_IIV6/4-38,seq-9,seq-9,KCSKWHE----QP-LINPLTNRKIKKNGPTYKELERECGP


## Read in tree data