In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
from scipy import stats
import statsmodels
from statsmodels.multivariate.manova import MANOVA

# correlated trait analysis (ANOVA)

In [2]:
crano = pd.read_csv('./p_crano.csv')
sipho = pd.read_csv('./p_sipho.csv')
longi = pd.read_csv('./p_longi.csv')
concatenated = pd.concat([longi.assign(dataset='longiflora'), crano.assign(dataset='cranolopha'), sipho.assign(dataset='siphonantha')])
concatenated.columns

Index(['species', 'locality', 'specimen', 'tube_len', 'non_beak', 'beak_len',
       'galea_len', 'lip_len', 'lip_wid', 'cor_wid', 'dataset'],
      dtype='object')

In [3]:
df = concatenated
df.columns = df.columns.str.replace(".", "_")
df.head()



  df.columns = df.columns.str.replace(".", "_")


Unnamed: 0,species,locality,specimen,tube_len,non_beak,beak_len,galea_len,lip_len,lip_wid,cor_wid,dataset
0,P. longiflora,33,DE141,42.47,8.46,6.02,14.48,8.85,5.42,17.55,longiflora
1,P. longiflora,33,DE141,49.14,7.41,7.18,14.59,,,14.91,longiflora
2,P. longiflora,33,DE141,49.54,8.87,7.31,16.18,,10.85,16.22,longiflora
3,P. longiflora,33,DE141,48.71,7.98,7.43,15.41,,,15.79,longiflora
4,P. longiflora,34,DE154,79.19,9.01,3.54,12.55,8.21,7.14,18.36,longiflora


In [4]:
df

Unnamed: 0,species,locality,specimen,tube_len,non_beak,beak_len,galea_len,lip_len,lip_wid,cor_wid,dataset
0,P. longiflora,33,DE141,42.47,8.46,6.02,14.48,8.85,5.42,17.55,longiflora
1,P. longiflora,33,DE141,49.14,7.41,7.18,14.59,,,14.91,longiflora
2,P. longiflora,33,DE141,49.54,8.87,7.31,16.18,,10.85,16.22,longiflora
3,P. longiflora,33,DE141,48.71,7.98,7.43,15.41,,,15.79,longiflora
4,P. longiflora,34,DE154,79.19,9.01,3.54,12.55,8.21,7.14,18.36,longiflora
...,...,...,...,...,...,...,...,...,...,...,...
79,P. siphonantha,113,DE530,,4.76,8.21,12.97,9.21,9.66,14.61,siphonantha
80,P. siphonantha,81,DE400,92.93,3.91,6.61,10.52,7.18,8.74,13.71,siphonantha
81,P. siphonantha,81,DE400,94.05,4.13,6.38,10.51,6.89,7.47,13.81,siphonantha
82,P. siphonantha,84,DE423,73.22,4.59,6.67,11.26,7.41,7.99,13.09,siphonantha


In [19]:
maov = MANOVA.from_formula('tube_len + beak_len + \
                            tube_len + cor_wid   ~ dataset', data=df)
print(maov.mv_test())

                   Multivariate linear model
                                                                
----------------------------------------------------------------
       Intercept         Value  Num DF  Den DF   F Value  Pr > F
----------------------------------------------------------------
          Wilks' lambda  0.0198 3.0000 339.0000 5580.5293 0.0000
         Pillai's trace  0.9802 3.0000 339.0000 5580.5293 0.0000
 Hotelling-Lawley trace 49.3852 3.0000 339.0000 5580.5293 0.0000
    Roy's greatest root 49.3852 3.0000 339.0000 5580.5293 0.0000
----------------------------------------------------------------
                                                                
----------------------------------------------------------------
          dataset         Value  Num DF  Den DF  F Value  Pr > F
----------------------------------------------------------------
            Wilks' lambda 0.3289 6.0000 678.0000  84.0490 0.0000
           Pillai's trace 0.7350 6.0000 680.0

In [20]:
maov = MANOVA.from_formula('tube_len + \
                            cor_wid   ~ dataset', data=df)
print(maov.mv_test())

                   Multivariate linear model
                                                                
----------------------------------------------------------------
       Intercept         Value  Num DF  Den DF   F Value  Pr > F
----------------------------------------------------------------
          Wilks' lambda  0.0260 2.0000 343.0000 6430.6169 0.0000
         Pillai's trace  0.9740 2.0000 343.0000 6430.6169 0.0000
 Hotelling-Lawley trace 37.4963 2.0000 343.0000 6430.6169 0.0000
    Roy's greatest root 37.4963 2.0000 343.0000 6430.6169 0.0000
----------------------------------------------------------------
                                                                
----------------------------------------------------------------
          dataset         Value  Num DF  Den DF  F Value  Pr > F
----------------------------------------------------------------
            Wilks' lambda 0.3570 4.0000 686.0000 115.5350 0.0000
           Pillai's trace 0.6546 4.0000 688.0

In [21]:
maov = MANOVA.from_formula('tube_len + \
                            beak_len   ~ dataset', data=df)
print(maov.mv_test())

                   Multivariate linear model
                                                                
----------------------------------------------------------------
       Intercept         Value  Num DF  Den DF   F Value  Pr > F
----------------------------------------------------------------
          Wilks' lambda  0.0367 2.0000 360.0000 4718.9519 0.0000
         Pillai's trace  0.9633 2.0000 360.0000 4718.9519 0.0000
 Hotelling-Lawley trace 26.2164 2.0000 360.0000 4718.9519 0.0000
    Roy's greatest root 26.2164 2.0000 360.0000 4718.9519 0.0000
----------------------------------------------------------------
                                                                
----------------------------------------------------------------
           dataset         Value  Num DF  Den DF  F Value Pr > F
----------------------------------------------------------------
             Wilks' lambda 0.8422 4.0000 720.0000 16.1396 0.0000
            Pillai's trace 0.1625 4.0000 722.

In [22]:
maov = MANOVA.from_formula('non_beak + \
                            beak_len   ~ dataset', data=df)
print(maov.mv_test())

                   Multivariate linear model
                                                                
----------------------------------------------------------------
       Intercept         Value  Num DF  Den DF   F Value  Pr > F
----------------------------------------------------------------
          Wilks' lambda  0.0259 2.0000 369.0000 6941.0709 0.0000
         Pillai's trace  0.9741 2.0000 369.0000 6941.0709 0.0000
 Hotelling-Lawley trace 37.6210 2.0000 369.0000 6941.0709 0.0000
    Roy's greatest root 37.6210 2.0000 369.0000 6941.0709 0.0000
----------------------------------------------------------------
                                                                
----------------------------------------------------------------
          dataset         Value  Num DF  Den DF  F Value  Pr > F
----------------------------------------------------------------
            Wilks' lambda 0.2439 4.0000 738.0000 189.0950 0.0000
           Pillai's trace 0.8353 4.0000 740.0