### Import dependencies

In [102]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
#!pip install squarify
#plt.style.use('fivethirtyeight')
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.tools as tls
import pygsheets
%matplotlib inline


### Authorize connection between Jupyter notebook (python working environment; like R studio) and google sheets so we can run analyses as the sheet is updated

In [103]:
gc = pygsheets.authorize() 
# Use customized credentials 
gc = pygsheets.authorize(client_secret='client_secret.json')
# For the first time, it will may produce as a link to authorize

### Open spreadsheet by name

In [104]:
sh = gc.open('PythonCurrentFlow_Aging')

### Open "Data" worksheet (can also use sh.sheet1)

In [105]:
data = sh[0]

### Get worksheet values as pandas dataframe

In [106]:
aging_data = pd.DataFrame(data.get_all_records())

In [107]:
aging_data

Unnamed: 0,Genus,Species,Sample ID,Species Code,Site,River,Basin,Lat,Long,Date Collected,...,Status Upon Collection,Final Age,Age dif,Z age,L age,B age,K&M Age,K age,M age,Notes
0,Amblema,plicata,1,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,15,,,,,15,15,14,
1,Amblema,plicata,2,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,16,,,,,16,16,12,
2,Amblema,plicata,3,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,17,,,,,17,17,9,
3,Amblema,plicata,4,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,15,,,,,15,13,12,
4,Amblema,plicata,5,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,14,,,,,14,14,12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,Lampsilis,teres,471,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
470,Lampsilis,teres,472,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
471,Lampsilis,teres,473,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
472,Lampsilis,teres,474,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,B: how could you tell difference b/w 0.5 & 1?


In [108]:
df_rename=aging_data.rename(columns={"Sample ID": "id", "Genus":"genus", "Species":"species", "Species Code": "code", "Site":"site", "River":"river", "Basin":"basin", "Lat":"lat", "Long":"long", "Date Collected":"date", "Status Upon Collection":"status", "Ager":"ager", "Age":"age"})
df_rename

Unnamed: 0,genus,species,id,code,site,river,basin,lat,long,date,...,status,Final Age,Age dif,Z age,L age,B age,K&M Age,K age,M age,Notes
0,Amblema,plicata,1,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,15,,,,,15,15,14,
1,Amblema,plicata,2,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,16,,,,,16,16,12,
2,Amblema,plicata,3,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,17,,,,,17,17,9,
3,Amblema,plicata,4,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,15,,,,,15,13,12,
4,Amblema,plicata,5,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,14,,,,,14,14,12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,Lampsilis,teres,471,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
470,Lampsilis,teres,472,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
471,Lampsilis,teres,473,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
472,Lampsilis,teres,474,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,B: how could you tell difference b/w 0.5 & 1?


### What do age measurements look like across Z, L & B?
#### Compare value counts 

In [109]:
pd.crosstab(index=aging_data['Species'], columns=aging_data['Z age'])

Z age,0.5,1,2,3,4,5,6,7,8,9,12,13,Unnamed: 13_level_0,NA
Species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
plicata,0,1,1,0,0,0,0,0,1,0,1,1,320,0
teres,7,6,12,33,22,18,14,10,7,2,0,0,16,2


##### L. teres
#### Ager Z: aged most mussels at 3 yrs old

In [110]:
pd.crosstab(index=aging_data['Species'], columns=aging_data['L age'])

L age,0.5,1,2,3,4,5,6,7,8,9,10,11,13,Unnamed: 14_level_0
Species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
plicata,0,1,1,0,0,0,0,0,0,1,1,0,1,320
teres,7,0,6,19,26,34,17,8,10,1,2,1,0,18


##### L. teres
#### Ager L: aged most mussels at 5 yrs old

In [111]:
pd.crosstab(index=aging_data['Species'], columns=aging_data['B age'])

B age,1,2,3,4,5,6,7,8,9,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0
Species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
plicata,0,0,0,0,0,0,0,0,0,325,0,0
teres,9,12,25,27,21,12,10,5,2,24,1,1


##### L. teres
#### Ager B: aged most mussels at 4 yrs old

In [35]:
pd.crosstab(index=aging_data['Species'], columns=aging_data['Site'])

Site,"Altair, TX","Bay City, TX","Bellville, TX","Gonzales, TX","Navasota, TX","Simonton, TX","Victoria, TX"
Species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
plicata,100,0,5,101,19,0,100
teres,13,67,19,0,0,50,0


### Download dependencies for figures 

In [82]:
df_rename.head()

Unnamed: 0,genus,species,id,code,site,river,basin,lat,long,date,...,status,Final Age,Age dif,Z age,L age,B age,K&M Age,K age,M age,Notes
0,Amblema,plicata,1,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,15,,,,,15,15,14,
1,Amblema,plicata,2,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,16,,,,,16,16,12,
2,Amblema,plicata,3,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,17,,,,,17,17,9,
3,Amblema,plicata,4,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,15,,,,,15,13,12,
4,Amblema,plicata,5,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,14,,,,,14,14,12,


In [112]:
df_rename['site'].value_counts()

Altair, TX       113
Gonzales, TX     101
Victoria, TX     100
Bay City, TX      67
Simonton, TX      50
Bellville, TX     24
Navasota, TX      19
Name: site, dtype: int64

In [113]:
df_rename['species'].value_counts()

plicata    325
teres      149
Name: species, dtype: int64

In [114]:
df = df_rename
df

Unnamed: 0,genus,species,id,code,site,river,basin,lat,long,date,...,status,Final Age,Age dif,Z age,L age,B age,K&M Age,K age,M age,Notes
0,Amblema,plicata,1,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,15,,,,,15,15,14,
1,Amblema,plicata,2,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,16,,,,,16,16,12,
2,Amblema,plicata,3,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,17,,,,,17,17,9,
3,Amblema,plicata,4,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,15,,,,,15,13,12,
4,Amblema,plicata,5,D,"Gonzales, TX",Guadalupe,Guadalupe,29.493646°,-97.431293°,9/24/19,...,Alive,14,,,,,14,14,12,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
469,Lampsilis,teres,471,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
470,Lampsilis,teres,472,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
471,Lampsilis,teres,473,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,
472,Lampsilis,teres,474,E,"Bellville, TX",Brazos,Brazos,29.939875°,-96.129332°,8/28/20,...,Alive,0.5,0,0.5,0.5,1,,,,B: how could you tell difference b/w 0.5 & 1?


In [115]:
import plotly.figure_factory as ff




In [116]:
x1 = df["Z age"]
x2 = df["L age"]
x3 = df["B age"]

hist_data = [x1, x2, x3]

group_labels = ['Z', 'L', 'B']
colors = ['#A56CC1', '#A6ACEC', '#63F5EF']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, colors=colors,
                         bin_size=.2, show_rug=False)

# Add title
fig.update_layout(title_text='Hist and Curve Plot')
fig.show()

TypeError: '<' not supported between instances of 'int' and 'str'