In [None]:

from Bio import SeqIO
from Bio.Data import CodonTable
import pandas as pd

## 1 ##
    
    def get_sequences_from_file(fasta_fn):

    sequence_data_dict = {}
    
    for record in SeqIO.parse(fasta_fn, "fasta"):
    
        description = record.description.split()
        
        species_name = description[1] + " " + description[2] 
        
        sequence_data_dict[species_name] = record.seq 
             
    return(sequence_data_dict)

    
    
    ## 2 ##
def translate_function(string_nucleotides): 
    mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"] 
    
    aa_seq_string =""
    for i in range(0, len(string_nucleotides), 3):  
    
        codon = string_nucleotides[i:i+3]
        if codon in mito_table.stop_codons:
            break        
            
        aminoacid = mito_table.forward_table[codon] 
          
        aa_seq_string += aminoacid        
        
    return(aa_seq_string)


## 3 ##

from Bio.Seq import Seq

def Alt_translate(string_nucleotides): 

    coding_dna = Seq(string_nucleotides) 
    
    result = string_nucleotides.translate(table="Vertebrate Mitochondrial", to_stop=True) 
    
    AA_Sequence = str(result) 
    
    return(AA_Sequence) 
  

##### 4 #####
def get_proportion_aa(aa_seq):
   
    charged = ['R','K','D','E'] 
    polar = ['Q','N','H','S','T','Y','C','M','W']
    hydrophobic = ['A','I','L','F','V','P','G']
    c_count=0
    p_count=0
    h_count=0
    
    for aa in str(aa_seq):
        if aa in charged:
            c_count += 1
            
        elif aa in polar:
            p_count += 1
            
        elif aa in hydrophobic:
            h_count += 1
            
    proportion_type = [c_count/len(aa_seq), p_count/len(aa_seq), h_count/len(aa_seq)]
    
    return proportion_type


#Main function

cytb_seqs = get_sequences_from_file("bears_cytb.fasta") 

bear_df = pd.read_csv("bears_data.csv") 

species_list = list(bear_df.species)

for key, value in cytb_seqs.items():
    aa_seq =Alt_translate(str(value)) 
    
    proportion = get_proportion_aa(aa_seq) 
    
    bear_df.loc[bear_df.species==key, 'charged'] = proportion[0]
    
    bear_df.loc[bear_df.species==key, 'polar'] = proportion[1]
    
    bear_df.loc[bear_df.species==key, 'hydrophobic'] = proportion[2] 
    
    #5#

import seaborn as sea

bar_chart = sea.barplot(x = 'species', y = 'mass', data=bear_df) 

bar_chart.set_xticklabels(labels = bear_df.species, rotation=30, horizontalalignment='right') 



## 6 ##

#The largest bear species is the Ursus spelaeus.

#The other thing that is interesting about this species is that the spelaeus does not demonstrate a significantly difference gene sequence than the other species. Huh. 

## 7 ##

import numpy as num
import matplotlib.pyplot as mpl
ind = num.arange(len(bear_df))
p1= mpl.bar(ind, bear_df.charged)
p2= mpl.bar(ind, bear_df.polar, bottom = bear_df.charged)
p3= mpl.bar(ind, bear_df.hydrophobic, bottom = bear_df.charged + bear_df.polar)
mpl.xlabel('Ursus Species')
mpl.xticks(ind, bear_df.species, rotation=30, horizontalalignment='right')
mpl.ylabel('Proportion')
mpl.yticks(np.arange(0, 1.1, 0.1))
mpl.legend((p1[0], p2[0], p3[0]), ('Charged', 'Polar', 'Hydrophobic'), bbox_to_anchor=(1.01, 1.01))
mpl.show()


## 8 ##

bear_df.to_csv('bears_mass_cytb.csv')


#bonus

Bonus = get_sequences_from_file("bears_cytb.fasta") 

nuc_count=pd.DataFrame

for data,value in Bonus.items():
 
    G=0
    A=0
    C=0
    T=0
    
    for j in range(0,len(value)):
    
        nuc=value[j]
        if nuc=='G':
            G=G+1
       
        elif nuc=='A':
            A=A+1
     
        elif nuc=='T':
            T=T+1

        else:
            C=C+1 
    print(data)
    print("G={}, A={}, C={}, T={}".format(G,A,C,T))



