# Performing statistical analysis for weight and final adiposity

Download timeseries for weight and terminate data from the Dropbox/../Figures/Data for Figures. Perform statistical analysis for (A) weight vs. time and (B) final adiposity for all 4 experimental groups

In this analysis, I perform 3 types of statistical test for both body weight and adiposity:
1. 2-sample independent t-tests 
2. ANOVA
3. MannU Whitney Analysis

In [1]:
#----------------------------------------------------------
# Import important libraries
#----------------------------------------------------------
import pandas as pd
import numpy as np
import datetime
import os 
import matplotlib.pyplot as plt
import seaborn as sns
import math
from scipy import stats
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
import statsmodels.stats.multicomp
import matplotlib.dates as mdates
sns.set()

## Download Body Weight data

In [42]:
#----------------------------------------------------------
# Import body weight data
#----------------------------------------------------------
weight_data_location = "https://www.dropbox.com/s/wktthzuhv7vgvox/2018VT%20-%20final%20weight%20log.csv?dl=1"
body_weight = pd.read_csv(weight_data_location).T
body_weight.columns = body_weight.iloc[0]

metafile = body_weight.iloc[1:3].T
body_weight.drop(['Rat', 'Diet', 'Feeding'], inplace = True)
metafile.head()


Unnamed: 0_level_0,Diet,Feeding
Rat,Unnamed: 1_level_1,Unnamed: 2_level_1
1,control,ad lib
2,control,ad lib
3,control,ad lib
4,control,ad lib
5,control,ad lib


## Download terminal data master document

In [5]:
#----------------------------------------------------------
# Import terminal data
#----------------------------------------------------------
master_data_location = 'https://www.dropbox.com/s/eu0szifsb9yimo3/2018VT_termination_data_master_document.csv?dl=1'
master_data = pd.read_csv(master_data_location, index_col=0)
# Correct column names
master_data = master_data.rename(columns={" Leptin": "Leptin", "triglyceride (mg/mL)": "Triglyceride"})
master_data['group']=master_data.diet+' '+master_data.feeding_schedule
master_data.head()

Unnamed: 0_level_0,diet,feeding_schedule,liver_weight,total_abdominal_fat,total_gonadal_fat,total_fat_pad,final_BW,Leptin,Cholesterol,Triglyceride,GSP (umol/L),Adiponectin,group
Rat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,control,ad lib,17.2,4.6,3.9,8.5,380,1.637628,82.053143,0.942691,112.240742,4.988413,control ad lib
2,control,ad lib,14.0,5.4,3.4,8.8,318,2.032943,103.345415,1.254153,121.759269,4.256736,control ad lib
3,control,ad lib,13.6,5.5,3.5,9.0,351,1.555637,55.869566,0.228261,144.648151,6.666516,control ad lib
4,control,ad lib,14.5,5.2,4.7,9.9,357,2.997072,96.807567,0.407609,155.222252,3.853354,control ad lib
5,control,ad lib,13.5,7.4,5.6,13.0,363,2.704246,81.771341,0.619565,221.916665,4.382227,control ad lib


# Statistics Testing 
## A.Body Weight
### Create a Body Weight dataframe specifically for ANOVA

In [8]:
#----------------------------------------------------------
#Created BW dataframe appropriate for ANOVA testing
#----------------------------------------------------------
body_weight_anova = pd.read_csv(weight_data_location, index_col = 0)
body_weight_anova['diet_and_schedule'] = body_weight_anova["Diet"].astype(str) +" "+ body_weight_anova["Feeding"].astype(str)
body_weight_anova.head()

Unnamed: 0_level_0,Diet,Feeding,17-Sep,18-Sep,19-Sep,20-Sep,21-Sep,22-Sep,23-Sep,24-Sep,...,1-Nov,2-Nov,3-Nov,4-Nov,5-Nov,6-Nov,7-Nov,8-Nov,9-Nov,diet_and_schedule
Rat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,control,ad lib,41,47,54,58,64,70,77,85,...,351,355,357,358,364,368,371,376,380,control ad lib
2,control,ad lib,41,45,50,54,58,64,69,74,...,293,298,298,302,304,308,311,315,318,control ad lib
3,control,ad lib,53,58,64,71,78,83,89,94,...,324,330,333,336,333,338,340,347,351,control ad lib
4,control,ad lib,41,45,50,54,59,64,70,77,...,331,337,338,345,342,347,351,355,357,control ad lib
5,control,ad lib,47,51,57,61,67,72,80,87,...,329,336,338,343,347,348,356,360,363,control ad lib


## 1. Two Sample Independent T-Tests
### Cont AdLib vs HFHS AdLib (Before Day 28)
This analysis is to examine how body weight between all HFHS rats differed from Control rats in the first 28 days (first phase) of the experiment. There are also t-tests for comparisons after Day 28.

In [9]:
cont_hfhs_t_test_results = {}
cont_adlib_cont_restr_t_test_results = {}
cont_adlib_hfhs_adlib_t_test_results = {}
hfhs_adlib_hfhs_restr_t_test_results = {}
cont_restr_hfhs_restr_t_test_results = {}

daynumber = 1
for day in body_weight.index:
    groupa = body_weight_anova.where((body_weight_anova.diet_and_schedule == "control ad lib") | (body_weight_anova.diet_and_schedule == "control restriction"))[day].dropna()
    groupb = body_weight_anova.where((body_weight_anova.diet_and_schedule == "HFHS ad lib") | (body_weight_anova.diet_and_schedule == "HFHS restriction"))[day].dropna()
    group1 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "control ad lib")[day].dropna()
    group2 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "control restriction")[day].dropna()
    group3 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "HFHS ad lib")[day].dropna()
    group4 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "HFHS restriction")[day].dropna()
    # Control AdLib + Control Restricted vs HFHS AdLib + HFHS Restricted - Only Search for Days Before ~27
    cont_hfhs_t_test_results["Day " + str(daynumber) + ": " + day + " - Control AdLib + Control Restricted vs HFHS AdLib + HFHS Restricted"] = stats.ttest_ind(groupa,groupb)
    # Control AdLib vs Control Restricted
    cont_adlib_cont_restr_t_test_results["Day " + str(daynumber) + ": " + day + " - Control AdLib vs Control Restricted"] = stats.ttest_ind(group1,group2)
    # Control Adlib vs HFHS Ad Lib - Only Search for Days After ~27
    cont_adlib_hfhs_adlib_t_test_results["Day " + str(daynumber) + ": " + day + " - Control Adlib vs HFHS Ad Lib"] = stats.ttest_ind(group1,group3)
    # HFHS AdLib vs HFHS Restricted
    hfhs_adlib_hfhs_restr_t_test_results["Day " + str(daynumber) + ": " + day + " - HFHS AdLib vs HFHS Restricted"] = stats.ttest_ind(group3,group4)
    # Control Restricted vs HFHS Restricted
    cont_restr_hfhs_restr_t_test_results["Day " + str(daynumber) + ": " + day + " - Control Restricted vs HFHS Restricted"] = stats.ttest_ind(group2,group4)
    daynumber += 1
# Control AdLib + Control Restricted vs HFHS AdLib + HFHS Restricted Results
cont_hfhs_t_test_results_df = pd.DataFrame.from_dict(cont_hfhs_t_test_results, orient='Index')
cont_hfhs_t_test_results_df.columns = ['statistic','pvalue']
cont_hfhs_significant_days = cont_hfhs_t_test_results_df[cont_hfhs_t_test_results_df.pvalue<0.05].drop_duplicates(subset='pvalue')

# Control AdLib vs Control Restricted Results
cont_adlib_cont_restr_t_test_results_df = pd.DataFrame.from_dict(cont_adlib_cont_restr_t_test_results, orient='Index')
cont_adlib_cont_restr_t_test_results_df.columns = ['statistic','pvalue']
cont_adlib_cont_restr_significant_days = cont_adlib_cont_restr_t_test_results_df[cont_adlib_cont_restr_t_test_results_df.pvalue<0.05].drop_duplicates(subset='pvalue')

# Control Adlib vs HFHS Ad Lib Results
cont_adlib_hfhs_adlib_t_test_results_df = pd.DataFrame.from_dict(cont_adlib_hfhs_adlib_t_test_results, orient='Index')
cont_adlib_hfhs_adlib_t_test_results_df.columns = ['statistic','pvalue']
cont_adlib_hfhs_adlib_significant_days = cont_adlib_hfhs_adlib_t_test_results_df[cont_adlib_hfhs_adlib_t_test_results_df.pvalue<0.05].drop_duplicates(subset='pvalue')

# HFHS AdLib vs HFHS Restricted Results
hfhs_adlib_hfhs_restr_t_test_results_df = pd.DataFrame.from_dict(hfhs_adlib_hfhs_restr_t_test_results, orient='Index')
hfhs_adlib_hfhs_restr_t_test_results_df.columns = ['statistic','pvalue']
hfhs_adlib_hfhs_restr_significant_days = hfhs_adlib_hfhs_restr_t_test_results_df[hfhs_adlib_hfhs_restr_t_test_results_df.pvalue<0.05].drop_duplicates(subset='pvalue')

# Control Restricted vs HFHS Restricted Results
cont_restr_hfhs_restr_t_test_results_df = pd.DataFrame.from_dict(cont_restr_hfhs_restr_t_test_results, orient='Index')
cont_restr_hfhs_restr_t_test_results_df.columns = ['statistic','pvalue']
cont_restr_hfhs_restr_significant_days = cont_restr_hfhs_restr_t_test_results_df[cont_restr_hfhs_restr_t_test_results_df.pvalue<0.05].drop_duplicates(subset='pvalue')


In [50]:
cont_hfhs_significant_days.to_csv("ControlvsHFHS_BeforeDay28_significant_days.csv")
cont_hfhs_significant_days.head()

Unnamed: 0,statistic,pvalue
Day 2: 18-Sep - Control AdLib + Control Restricted vs HFHS AdLib + HFHS Restricted,-2.107341,0.04277
Day 3: 19-Sep - Control AdLib + Control Restricted vs HFHS AdLib + HFHS Restricted,-2.346541,0.025103
Day 4: 20-Sep - Control AdLib + Control Restricted vs HFHS AdLib + HFHS Restricted,-2.508241,0.017228
Day 5: 21-Sep - Control AdLib + Control Restricted vs HFHS AdLib + HFHS Restricted,-3.190456,0.00311
Day 6: 22-Sep - Control AdLib + Control Restricted vs HFHS AdLib + HFHS Restricted,-3.331162,0.00214


### Control AdLib vs HFHS AdLib (After Day 28)

In [49]:
cont_adlib_hfhs_adlib_significant_days.iloc[16:21]

Unnamed: 0,statistic,pvalue
Day 25: 11-Oct - Control Adlib vs HFHS Ad Lib,-2.632888,0.018824
Day 26: 12-Oct - Control Adlib vs HFHS Ad Lib,-2.364131,0.031987
Day 27: 13-Oct - Control Adlib vs HFHS Ad Lib,-2.174674,0.046066
Day 28: 14-Oct - Control Adlib vs HFHS Ad Lib,-2.155829,0.047744
Day 29: 15-Oct - Control Adlib vs HFHS Ad Lib,-2.365468,0.031903


### Cont Res vs HFHS Res

In [51]:
cont_restr_hfhs_restr_significant_days.iloc[20:25]

Unnamed: 0,statistic,pvalue
Day 26: 12-Oct - Control Restricted vs HFHS Restricted,-3.111502,0.006716
Day 27: 13-Oct - Control Restricted vs HFHS Restricted,-3.289082,0.004624
Day 28: 14-Oct - Control Restricted vs HFHS Restricted,-2.91751,0.010068
Day 29: 15-Oct - Control Restricted vs HFHS Restricted,-3.232084,0.005214
Day 30: 16-Oct - Control Restricted vs HFHS Restricted,-3.374097,0.003866


### Cont AdLib vs Cont Res

In [13]:
cont_adlib_cont_restr_significant_days

Unnamed: 0,statistic,pvalue
Day 44: 30-Oct - Control AdLib vs Control Restricted,2.666408,0.016894
Day 45: 31-Oct - Control AdLib vs Control Restricted,2.41913,0.027838
Day 46: 1-Nov - Control AdLib vs Control Restricted,2.246678,0.039127


### HFHS AdLib vs HFHS Res

In [14]:
hfhs_adlib_hfhs_restr_significant_days

Unnamed: 0,statistic,pvalue


## 2. ANOVA Analysis with PostHoc Tukey
This analysis is to examine how body weight between 4 experimental groups differed from each other after day 28 (second phase) of the experiment. There are also ANOVA analyses for comparisons before Day 28 between all HFHS rats vs all Control rats

### First, I'll check for normality with the Shapiro-Wilk Test. Results printed below reveal which group of rats and which day were the rats having a non-normal body weight population

In [15]:
daynumber = 1
for day in body_weight.index:
    # Divide the body weight data into 6 groups (all HFHS rats, all Control rats, control ad lib, control restriction, HFHS ad lib, HFHS restriction)
    groupa = body_weight_anova.where((body_weight_anova.diet_and_schedule == "control ad lib") | (body_weight_anova.diet_and_schedule == "control restriction"))[day].dropna()
    groupb = body_weight_anova.where((body_weight_anova.diet_and_schedule == "HFHS ad lib") | (body_weight_anova.diet_and_schedule == "HFHS restriction"))[day].dropna()
    group1 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "control ad lib")[day].dropna()
    group2 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "control restriction")[day].dropna()
    group3 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "HFHS ad lib")[day].dropna()
    group4 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "HFHS restriction")[day].dropna()
    before28 = [groupa, groupb]
    after28 = [group1, group2, group3, group4]
    # For the first phase of the experiment
    if daynumber < 28:
        for group in before28:
            # Check normality for 1. all HFHS rats and 2. all Control rats for each day
            w, p = stats.shapiro(group)
            if p < 0.05:
                print("Day: " + str(daynumber))
                print(str(day))
                print(group)
                print(p)
    # For the second phase of the experiment
    else:
        for group in after28:
            # Check normality for all 4 experimental groups for each day
            w, p = stats.shapiro(group)
            if p < 0.05:
                print("Day: " + str(daynumber))
                print(str(day))
                print(group)
                print(p)    
    daynumber += 1

Day: 31
17-Oct
Rat
10    233.0
11    180.0
12    236.0
13    233.0
14    199.0
15    233.0
16    191.0
17    216.0
18    231.0
Name: 17-Oct, dtype: float64
0.03178425878286362
Day: 33
19-Oct
Rat
10    253.0
11    196.0
12    249.0
13    254.0
14    221.0
15    250.0
16    203.0
17    232.0
18    251.0
Name: 19-Oct, dtype: float64
0.033813685178756714
Day: 34
20-Oct
Rat
10    254.0
11    196.0
12    254.0
13    248.0
14    219.0
15    252.0
16    206.0
17    234.0
18    253.0
Name: 20-Oct, dtype: float64
0.03479504957795143
Day: 36
22-Oct
Rat
10    262.0
11    204.0
12    261.0
13    256.0
14    226.0
15    259.0
16    213.0
17    240.0
18    261.0
Name: 22-Oct, dtype: float64
0.03745651617646217
Day: 37
23-Oct
Rat
10    264.0
11    204.0
12    251.0
13    258.0
14    223.0
15    259.0
16    206.0
17    248.0
18    262.0
Name: 23-Oct, dtype: float64
0.030887814238667488
Day: 38
24-Oct
Rat
10    265.0
11    205.0
12    267.0
13    268.0
14    229.0
15    273.0
16    214.0
17    253.0
18 

### Next, I'll check to see if the variance among groups for each day is equal using the Levene test
**For days before Day 28, I will check if all the HFHS rats have equal variance to all the Control rats**

**For days after Day 28, I will check if all 4 groups have equal variance**

Results printed below will show the Day number and variance values for days when the body weight values among the groups was unequal

In [16]:
#----------------------------------------------------------
#Checked for 10x variance for each day
#----------------------------------------------------------
daynumber = 1
for day in body_weight.index:
    groupa = body_weight_anova.where((body_weight_anova.diet_and_schedule == "control ad lib") | (body_weight_anova.diet_and_schedule == "control restriction"))[day].dropna()
    groupb = body_weight_anova.where((body_weight_anova.diet_and_schedule == "HFHS ad lib") | (body_weight_anova.diet_and_schedule == "HFHS restriction"))[day].dropna()
    group1 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "control ad lib")[day].dropna()
    group2 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "control restriction")[day].dropna()
    group3 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "HFHS ad lib")[day].dropna()
    group4 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "HFHS restriction")[day].dropna()
    variances = {}
    if daynumber < 28:
        variances["Control Ad Lib"] = np.var(groupa)
        variances["HFHS Ad Lib"] = np.var(groupb)
    else:
        variances["Control Ad Lib"] = np.var(group1)
        variances["HFHS Ad Lib"] = np.var(group3)
        variances["Control Restriction"] = np.var(group2)
        variances["HFHS Restriction"] = np.var(group4)
    if max(variances.values()) >= (10 * min(variances.values())):
        print(day)
        print(variances)
    daynumber += 1
    
#----------------------------------------------------------
#Ran Levene Test
#----------------------------------------------------------
daynumber = 1
for day in body_weight.index:
    groupa = body_weight_anova.where((body_weight_anova.diet_and_schedule == "control ad lib") | (body_weight_anova.diet_and_schedule == "control restriction"))[day].dropna()
    groupb = body_weight_anova.where((body_weight_anova.diet_and_schedule == "HFHS ad lib") | (body_weight_anova.diet_and_schedule == "HFHS restriction"))[day].dropna()
    group1 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "control ad lib")[day].dropna()
    group2 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "control restriction")[day].dropna()
    group3 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "HFHS ad lib")[day].dropna()
    group4 = body_weight_anova.where(body_weight_anova.diet_and_schedule == "HFHS restriction")[day].dropna()
    if daynumber < 28:
        s, p = stats.levene(groupa, groupb)
    else:
        s, p = stats.levene(group1, group2, group3, group4)
    if p < 0.05:
        print(daynumber)
        print(day)
        print(s)
        print(p)
    daynumber += 1
    

11
27-Sep
4.409061110477712
0.04347561274847819


# Now, I will run a 2-way Type-1 ANOVA with TukeyHSD PostHoc Comparisions among all 4 diet groups (after day 28)

In [55]:
#----------------------------------------------------------
# Run ANOVA stats
#----------------------------------------------------------
def day_anova_analysis(day, anova_data):
    formula = 'Q("' + day + '") ~ C(Diet) + C(Feeding) + C(Diet):C(Feeding)'
    model = ols(formula, anova_data).fit()
    aov_table = anova_lm(model, typ=1)

    mc_interaction = statsmodels.stats.multicomp.MultiComparison(anova_data[day], anova_data['diet_and_schedule'])
    mc_interaction_results = mc_interaction.tukeyhsd()
    mc_interaction = pd.DataFrame(data=mc_interaction_results._results_table.data[1:], columns=mc_interaction_results._results_table.data[0])

    result = pd.concat([aov_table, mc_interaction], axis = 0, sort = False)
    # result.to_csv(group + "_statistical_analysis.csv")
     
    return result

daynumber = 1
for day in body_weight.index:
    result = day_anova_analysis(day, body_weight_anova)
    if (result["reject"].sum() > 0) and (daynumber > 25):
        print("Day " + str(daynumber))
        print(day)
        print(result.loc[[0, 1, 2, 3, 4, 5], ["group1", "group2", "p-adj", "reject"]])
    daynumber += 1


Day 26
12-Oct
             group1               group2   p-adj reject
0       HFHS ad lib     HFHS restriction  0.9000  False
1       HFHS ad lib       control ad lib  0.0886  False
2       HFHS ad lib  control restriction  0.0718  False
3  HFHS restriction       control ad lib  0.0327   True
4  HFHS restriction  control restriction  0.0256   True
5    control ad lib  control restriction  0.9000  False
Day 27
13-Oct
             group1               group2   p-adj reject
0       HFHS ad lib     HFHS restriction  0.9000  False
1       HFHS ad lib       control ad lib  0.1205  False
2       HFHS ad lib  control restriction  0.0811  False
3  HFHS restriction       control ad lib  0.0324   True
4  HFHS restriction  control restriction  0.0201   True
5    control ad lib  control restriction  0.9000  False
Day 28
14-Oct
             group1               group2   p-adj reject
0       HFHS ad lib     HFHS restriction  0.9000  False
1       HFHS ad lib       control ad lib  0.1508  False
2     

### Sending all of the ANOVA + PostHoc Results to a CSV File

In [18]:
df = pd.DataFrame()

daynumber = 1
for day in body_weight.index:
    result = day_anova_analysis(day, body_weight_anova)
    result.iloc[0, -1] = day
    result.iloc[1, -1] = "Day: " + str(daynumber)
    df = df.append(result)
    daynumber += 1
df.fillna("").rename(index={0:'', 1:'', 2:'', 3:'', 4:'', 5:''}).to_csv("All days - ANOVA with TukeyHSD.csv")

## Looking Closer into Each ANOVA and Tukey Result for Certain Groups

### HFHS AdLib vs Control AdLib

#### PostHoc + ANOVA for significant days

In [19]:
daynumber = 1
for day in body_weight.index:
    result = day_anova_analysis(day, body_weight_anova)
    if (result.loc[[1], ["p-adj"]].values < 0.05) and (daynumber > 25):
        print("Day " + str(daynumber))
        print(day)
        print(result)
    daynumber += 1

No results, so no days when HFHS ad lib rats had significantly less or more body weight than Control ad lib rats after Day 28

### HFHS Res vs Control Res

#### PostHoc + ANOVA for significant days

In [20]:
daynumber = 1
for day in body_weight.index:
    result = day_anova_analysis(day, body_weight_anova)
    if (result.loc[[4], ["p-adj"]].values < 0.05) and (daynumber > 25):
        print("Day " + str(daynumber))
        print(day)
        print(result.loc[[4], ["group1", "group2", "p-adj", "reject"]].to_string(index=False) + "\n")
        print(result.iloc[0:4, 0:5])
        print("\n")
    daynumber += 1

Day 26
12-Oct
           group1               group2   p-adj reject
 HFHS restriction  control restriction  0.0256   True

                      df        sum_sq      mean_sq          F    PR(>F)
C(Diet)              1.0   4959.378525  4959.378525  14.958558  0.000527
C(Feeding)           1.0     10.653991    10.653991   0.032135  0.858899
C(Diet):C(Feeding)   1.0     36.875421    36.875421   0.111224  0.741000
Residual            31.0  10277.777778   331.541219        NaN       NaN


Day 27
13-Oct
           group1               group2   p-adj reject
 HFHS restriction  control restriction  0.0201   True

                      df        sum_sq      mean_sq          F    PR(>F)
C(Diet)              1.0   5263.311298  5263.311298  14.713495  0.000575
C(Feeding)           1.0     16.973262    16.973262   0.047448  0.828992
C(Diet):C(Feeding)   1.0     93.367424    93.367424   0.261007  0.613049
Residual            31.0  11089.319444   357.719982        NaN       NaN


Day 28
14-Oct
      

### Send the Results to a CSV File

In [21]:
df = pd.DataFrame()

daynumber = 1
for day in body_weight.index:
    result = day_anova_analysis(day, body_weight_anova)
    if (result.loc[[4], ["p-adj"]].values < 0.05) and (daynumber > 25):
        result.iloc[0, -1] = day
        result.iloc[1, -1] = "Day: " + str(daynumber)
        df = df.append(result.iloc[[0, 1, 2, 3, 8],:])
    daynumber += 1
df.fillna("").rename(index={0:'', 1:'', 2:'', 3:'', 4:'', 5:''}).to_csv("HFHSRes vs ControlRes significant days - ANOVA with TukeyHSD.csv")

### Control Res vs Control AdLib

#### PostHoc + ANOVA for significant days

In [53]:
daynumber = 1
for day in body_weight.index:
    result = day_anova_analysis(day, body_weight_anova)
    if (result.loc[[5], ["p-adj"]].values < 0.05) and (daynumber > 25):
        print("Day " + str(daynumber))
        print(day)
        print(result.loc[[5], ["group1", "group2", "p-adj"]].to_string(index=False) + "\n")
        print(result.iloc[0:4, 0:5])
        print("\n")
    daynumber += 1

Day 44
30-Oct
         group1               group2   p-adj
 control ad lib  control restriction  0.0323

                     df        sum_sq       mean_sq          F    PR(>F)
C(Diet)             1.0  15876.900093  15876.900093  24.470140  0.000025
C(Feeding)          1.0   3940.469796   3940.469796   6.073216  0.019471
C(Diet):C(Feeding)  1.0   1729.263047   1729.263047   2.665212  0.112682




### Send the Results to a CSV File

In [24]:
df = pd.DataFrame()

daynumber = 1
for day in body_weight.index:
    result = day_anova_analysis(day, body_weight_anova)
    if (result.loc[[5], ["p-adj"]].values < 0.05) and (daynumber > 25):
        result.iloc[0, -1] = day
        result.iloc[1, -1] = "Day: " + str(daynumber)
        df = df.append(result.iloc[[0, 1, 2, 3, 9],:])
    daynumber += 1
df.fillna("").rename(index={0:'', 1:'', 2:'', 3:'', 4:'', 5:''}).to_csv("ControlAdLib vs ControlRes significant days - ANOVA with TukeyHSD.csv")

### HFHS Res vs HFHS AdLib

#### PostHoc + ANOVA for significant days

In [54]:
daynumber = 1
for day in body_weight.index:
    result = day_anova_analysis(day, body_weight_anova)
    if (result.loc[[0], ["p-adj"]].values < 0.05) and (daynumber > 25):
        print("Day " + str(daynumber))
        print(day)
        print(result.loc[[0], ["group1", "group2", "p-adj"]].to_string(index=False) + "\n")
        print(result.iloc[0:3, 0:5])
        print("\n")
    daynumber += 1

## 3. Mann-U Whitney Test

In [43]:
# Add experimental group labels to metafile data
metafile['group']=metafile.Diet+' '+metafile.Feeding

def MannUWhitneyAnalysis(column_name, dataframe, interest_list, measurement_of_interest):
    # Create the columns for the Mann-U Whitney Analysis Table
    groups = metafile.group.unique()
    column_names = [column_name, "group1", "group2", "p_value"]
    df = pd.DataFrame(columns = column_names)

    # Separate dataframes by experimental condition
    feeding = metafile.Feeding.unique()
    diet   = metafile.Diet.unique()
    group_dict={}
    for x in diet:
            for y in feeding:
                group = str(x)+' '+str(y)
                ids = metafile[(metafile.Diet==x) & (metafile.Feeding==y)].index
                by_group = dataframe.loc[ids]
                by_group.dropna(inplace = True)
                group_dict[group]=by_group

    # For each metabolite/hormone, compare expression between 4 experimental groups. 
    # Save p-values and separate into significant and non-significant dataframes
    i=0
    for c in interest_list:

        for x in groups:
            group1 = group_dict[x][c]

            for y in groups:
                if(y!= x):
                    group2 = group_dict[y][c]
                    u_statistic, pVal = stats.mannwhitneyu(group1, group2)
                    df.loc[i]=[c, x, y, pVal]
                    i+=1
    df.to_csv(measurement_of_interest + '_p_values_Mann_Whitney.csv')
    # Sepatate significantly expressed genes (p<0.05) and  Remove repeated comparisons
    significant = df[df.p_value<0.05].drop_duplicates(subset='p_value').reset_index(drop=True)
    # Save significant genes to .csv file
    significant.to_csv('significant_' + measurement_of_interest + '_p_values_Mann_Whitney.csv')
    return significant
    
significant_days = MannUWhitneyAnalysis("Day", body_weight_anova, body_weight.index, "Body_weight")

# Check the significant days for control restriction vs control ad lib
significant_days[significant_days.group2 == "control restriction"]

Unnamed: 0,Day,group1,group2,p_value
104,20-Oct,control ad lib,control restriction,0.028622
114,24-Oct,control ad lib,control restriction,0.016529
118,25-Oct,control ad lib,control restriction,0.03164
124,27-Oct,control ad lib,control restriction,0.023359
128,28-Oct,control ad lib,control restriction,0.046614
131,29-Oct,control ad lib,control restriction,0.021024
134,30-Oct,control ad lib,control restriction,0.009611
137,31-Oct,control ad lib,control restriction,0.010842
141,3-Nov,control ad lib,control restriction,0.025908
144,6-Nov,control ad lib,control restriction,0.031708


# B. Total Fat Mass

## 1. 2-Sample T-Test Analysis
I will use Student's t-test (default version in stats_ind) to compare fat pad mass between diet groups

In [28]:
fat_list = ["total_fat_pad", "total_abdominal_fat", "total_gonadal_fat", "liver_weight"]
t_test_results = {}
for group in fat_list:
    group1 = master_data.where(master_data.group == "control ad lib")[group].dropna()
    group2 = master_data.where(master_data.group == "control restriction")[group].dropna()
    group3 = master_data.where(master_data.group == "HFHS ad lib")[group].dropna()
    group4 = master_data.where(master_data.group == "HFHS restriction")[group].dropna()
    # Control AdLib vs Control Restricted
    t_test_results[group + "- Control AdLib vs Control Restricted"] = stats.ttest_ind(group1,group2)
    # Control Adlib vs HFHS Ad Lib
    t_test_results[group + "- Control Adlib vs HFHS Ad Lib"] = stats.ttest_ind(group1,group3)
    # HFHS AdLib vs HFHS Restricted
    t_test_results[group + "- HFHS AdLib vs HFHS Restricted"] = stats.ttest_ind(group3,group4)
    # Control Restricted vs HFHS Restricted
    t_test_results[group + "- Control Restricted vs HFHS Restricted"] = stats.ttest_ind(group2,group4)
results_df = pd.DataFrame.from_dict(t_test_results,orient='Index')
results_df.columns = ['statistic','pvalue']
#results_df
significant_group_v2 = results_df[results_df.pvalue<0.05].drop_duplicates(subset='pvalue')
significant_group_v2

Unnamed: 0,statistic,pvalue
total_fat_pad- Control Adlib vs HFHS Ad Lib,-2.853223,0.012087
total_fat_pad- Control Restricted vs HFHS Restricted,-3.608719,0.002355
total_abdominal_fat- Control Adlib vs HFHS Ad Lib,-2.358751,0.032323
total_abdominal_fat- Control Restricted vs HFHS Restricted,-3.633199,0.002237
total_gonadal_fat- Control Adlib vs HFHS Ad Lib,-2.59046,0.020484
total_gonadal_fat- Control Restricted vs HFHS Restricted,-2.583062,0.020017


## 2. ANOVA Analysis with PostHoc Tukey

### First, I'll check for normality with the Shapiro-Wilk Test

In [29]:
for group in master_data["group"].unique():
    w, p = stats.shapiro(master_data.where(master_data.group == group)["total_fat_pad"].dropna())
    print(p)

0.6800441145896912
0.12340932339429855
0.4587366580963135
0.8304331302642822


### Next, I'll check for equal variance with the Levene Test

In [30]:
variances = {}
for group in master_data["group"].unique():
    variances[str(group)] = np.var(master_data.where(master_data.group == group)["total_fat_pad"].dropna())
        

control_ad_lib_fat = master_data.where(master_data.group == "control ad lib")["total_fat_pad"].dropna()
control_restriction_fat = master_data.where(master_data.group == "control restriction")["total_fat_pad"].dropna()
HFHS_ad_lib_fat = master_data.where(master_data.group == "HFHS ad lib")["total_fat_pad"].dropna()
HFHS_restriction_fat = master_data.where(master_data.group == "HFHS restriction")["total_fat_pad"].dropna()

s, p = stats.levene(control_ad_lib_fat, control_restriction_fat, HFHS_ad_lib_fat, HFHS_restriction_fat)
if (p < 0.05) | (max(variances.values()) >= (3 * min(variances.values()))):
    print(p)
    print(variances)

0.004291396222530111
{'control ad lib': 8.291111111111112, 'control restriction': 4.638024691358023, 'HFHS ad lib': 40.94859375, 'HFHS restriction': 8.803950617283949}


### Now, I can run ANOVA and Tukey PostHoc

In [56]:
#----------------------------------------------------------
# Run ANOVA stats
#----------------------------------------------------------
def anova_analysis(metabolite, anova_data):
    formula = metabolite + ' ~ C(diet) + C(feeding_schedule) + C(diet):C(feeding_schedule)'
    model = ols(formula, anova_data).fit()
    aov_table = anova_lm(model, typ=1)
    
    mc_interaction = statsmodels.stats.multicomp.MultiComparison(anova_data[metabolite], anova_data['group'])
    mc_interaction_results = mc_interaction.tukeyhsd()
    mc_interaction = pd.DataFrame(data=mc_interaction_results._results_table.data[1:], columns=mc_interaction_results._results_table.data[0])
    
    result = pd.concat([aov_table, mc_interaction], axis = 0, sort = False)

    return result

total_fat_mass_anova = anova_analysis("total_fat_pad", master_data)

print(total_fat_mass_anova.loc[[0, 1, 4, 5], ["group1", "group2", "p-adj"]].to_string(index = False))
print(total_fat_mass_anova.iloc[0:3, 0:5])

           group1               group2   p-adj
      HFHS ad lib     HFHS restriction  0.3955
      HFHS ad lib       control ad lib  0.0058
 HFHS restriction  control restriction  0.0950
   control ad lib  control restriction  0.9000
                              df      sum_sq     mean_sq          F    PR(>F)
C(diet)                      1.0  297.633348  297.633348  17.635458  0.000209
C(feeding_schedule)          1.0   31.590319   31.590319   1.871799  0.181105
C(diet):C(feeding_schedule)  1.0   13.651519   13.651519   0.808884  0.375384


### Send Results to CSV File

In [33]:
total_fat_mass_anova.fillna("").rename(index={0:'', 1:'', 2:'', 3:'', 4:'', 5:''}).to_csv("Total Fat Mass - ANOVA with TukeyHSD.csv")

## 3. Mann-U Whitney Analysis

In [34]:
significant_fat = MannUWhitneyAnalysis("Total Fat Mass", master_data, fat_list, "Total_fat_mass")
significant_fat

Unnamed: 0,Total Fat Mass,group1,group2,p_value
0,total_fat_pad,control ad lib,HFHS ad lib,0.028187
1,total_fat_pad,control ad lib,HFHS restriction,0.008502
2,total_fat_pad,control restriction,HFHS ad lib,0.005506
3,total_fat_pad,control restriction,HFHS restriction,0.004019
4,total_fat_pad,HFHS ad lib,control restriction,0.005506
5,total_abdominal_fat,control ad lib,HFHS ad lib,0.035868
6,total_abdominal_fat,control ad lib,HFHS restriction,0.010741
7,total_abdominal_fat,control restriction,HFHS ad lib,0.014885
8,total_abdominal_fat,control restriction,HFHS restriction,0.002348
9,total_abdominal_fat,HFHS ad lib,control ad lib,0.035868
