In [1]:
# Update all relevant packages with conda
!conda install statsmodels seaborn pandas scipy seaborn numpy matplotlib -y 

Channels:
 - defaults
Platform: osx-arm64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.



In [147]:
import sys
import pandas as pd
import numpy as np
import scipy.stats as ttest_ind
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd


In [82]:
print(sys.executable)

/opt/anaconda3/envs/ezpz_env/bin/python


In [83]:
# List the contents of the current directory
!ls
# Print the working directory
!pwd
# Check the Python version
!python --version


ezpz_analysis.ipynb
/Users/ecrespo/Documents/github_project_folder/ezpzmouseanalytics/notebooks
Python 3.8.19


# Import the data

In [5]:
file_path = '/Users/ecrespo/Documents/github_project_folder/ezpzmouseanalytics/data/raw/Rbp4_social.csv' 

# Initialize the class 

In [175]:
class EzPzMouseAnalytics:
    def __init__(self, file_path):
        # Main class code
        """
        Initialize the EzPzMouseAnalytics object with the path to the CSV file containing the data.
        :param file_path: Path to the CSV file containing the data.
        
        
        
        
        
        """
        self.file_path = file_path
        """string: Path to the CSV file containing the data."""
        self.data = None
        """pandas.DataFrame: The main dataframe containing all mouse data after initial processing.""" 
        self.genotype_nested_df = None
        """pandas.DataFrame: Grouped dataframe where each group represents a unique genotype."""
        self.genotype_sex_groups = None
        """pandas.DataFrame: Grouped dataframe where each group represents a unique combination of genotype and sex."""
        self.genotype_colors = {
            'Rbp4-LMO3': 'hotpink',
            'wt-LMO3': 'grey'}
        """dict: Mapping of genotypes to their respective plot colors."""

        
        
        # Load and process data
        self.extract_data()  # Load the data
        self.rename_columns()  # Rename columns as needed
        self.calculate_age_in_months() # Calculate age in months and add to DataFrame
        self.check_unique_tags() # Check for duplicate tag entries 
        self._initialize_dataframes()  # Create grouped DataFrames after renaming
        

        # Initialize the classes neeed for analysis and plotting
     
        self.genotype_comparison = self.GenotypeComparisonAnalysis(self)
        """GenotypeComparisonAnalysis: Object for comparing two genotypes."""
        
        self.analysis = self.Analysis(self)
        """Analysis: Object for performing statistical analysis on the data."""   
    
    def _initialize_dataframes(self):
        """
        Internal method to initialize the genotype and genotype-sex separated DataFrames
        after loading the data. Intiliazed during the __init__ method.
        """
        if self.data is None:
            self.extract_data()  # Load the data if not already loaded

        # Create separate DataFrames for each genotype
        self.genotype_nested_df = self.data.groupby('genotype').apply(lambda x: x.reset_index(drop=True))

        # Create separate DataFrames for each combination of genotype and sex
        self.genotype_sex_groups = self.data.groupby(['genotype', 'sex']).apply(lambda x: x.reset_index(drop=True))
        
        #print("Columns in genotype_nested_df:", self.genotype_nested_df.columns.tolist())
        #print("Columns in genotype_sex_groups:", self.genotype_sex_groups.columns.tolist())

    def rename_columns(self):
        """
        Rename specific columns in the self.data based on the strings in the column_map dictionary. 
        Intialized during the __init__ method.
        
        """
        if self.data is None:
            raise ValueError("Data not loaded. Please run extract_data() first.")
        
        print("rename_columns method called.")
        print("Columns before renaming:", self.data.columns.tolist())
      
       #create a mapping of the new names 
        column_map = {
            'summouse1': 'interactiontime_sum_familiar',
            'summouse2': 'interactiontime_sum_stranger',
            'avgmouse1': 'interactiontime_avg_familiar',
            'avgmouse2': 'interactiontime_avg_stranger',
            'boutmouse1': 'approach_sum_familiar',
            'boutmouse2': 'approach_sum_stranger'
        }
        
        # Rename the column names that are in the column_map
        self.data.rename(columns=column_map, inplace=True)
        print("Columns after renaming:", self.data.columns.tolist())
    
    def extract_data(self):
        """
        
        Extract data from the CSV file and store it in a DataFrame. But remove the Unamed: 15 and Unamed: 16 columns
        
        :return: DataFrame containing the loaded data.
        """
        self.data = pd.read_csv(self.file_path)
        self.data = self.data.drop(columns=['Unnamed: 15', 'Unnamed: 16'])

        return self.data

    def calculate_age_in_months(self):
        """
        Calculate the age of each mouse at the time of testing in months.
        Intialized during the __init__ method.
        :return: DataFrame with columns for DOB, datetested, and age in months.
        """
        if self.data is None:
            raise ValueError("Data not loaded. Please run extract_data() first.")
        
        # Clean up any leading or trailing spaces in the column names
        self.data.columns = self.data.columns.str.strip()
        
        # Rename 'date tested' to 'datetested' if it exists
        if 'date tested' in self.data.columns:
            self.data.rename(columns={'date tested': 'datetested'}, inplace=True)
        else:
            raise KeyError("Column 'date tested' not found in the data. Please check the column names.")
        
        # Convert DOB and datetested to datetime objects
        self.data['DOB'] = pd.to_datetime(self.data['DOB'], format='%m/%d/%y')
        self.data['datetested'] = pd.to_datetime(self.data['datetested'], format='%m/%d/%y')
        
        # Calculate the difference in months
        self.data['age_in_months'] = ((self.data['datetested'] - self.data['DOB']).dt.days / 30.44).round(2)
        return self.data[['DOB', 'datetested', 'age_in_months']]

    def check_unique_tags(self):
        """
        Ensure that the 'tag' column contains unique identifiers for each mouse.
        
        :return: DataFrame with duplicate tag entries if any exist; otherwise, None.
        """
        if self.data is None:
            raise ValueError("Data not loaded. Please run extract_data() first.")
        
        duplicate_tags = self.data[self.data.duplicated('tag', keep=False)]
        
        if not duplicate_tags.empty:
            print("Duplicate 'tag' values found.")
            return duplicate_tags[['tag', 'DOB', 'date tested', 'genotype', 'treatment']]
        else:
            return None

    class GenotypeComparisonAnalysis:
        # First nested clas
        def __init__(self, parent):
            """
            Initialize the GenotypeComparisonAnalysis object 
            """
            self.parent = parent           
            self.genotype1 = 'Rbp4-LMO3'
            """str: The first genotype to be compared."""
            self.genotype2 = 'wt-LMO3'
            """str: The second genotype to be compared."""

            self.data_genotype1 = None
            """pandas.DataFrame: Data subset for the first genotype (Rbp4-LMO3)."""
            self.data_genotype2 = None
            """pandas.DataFrame: Data subset for the second genotype (wt-LMO3)."""
            
            self._initialize_genotype_data()
        
        def _initialize_genotype_data(self):
            if self.data is None:
                raise ValueError("Data not loaded. Please run extract_data() first.")
            # Access the genotype-separated DataFrames through the parent
            if self.genotype1 in self.parent.genotype_nested_df.index.get_level_values('genotype'):
                self.data_genotype1 = self.parent.genotype_nested_df.xs(self.genotype1, level='genotype')
            else:
                raise ValueError(f"Genotype '{self.genotype1}' not found in the data.")
            
            if self.genotype2 in self.parent.genotype_nested_df.index.get_level_values('genotype'):
                self.data_genotype2 = self.parent.genotype_nested_df.xs(self.genotype2, level='genotype')
            else:
                raise ValueError(f"Genotype '{self.genotype2}' not found in the data.")


    class Analysis:
         # Second nested class
        def __init__(self, parent):
            self.parent = parent
            self.genotype_results = pd.DataFrame()
            """pandas.DataFrame: Stores results for genotype comparisons."""
            
            self.genotype_sex_results = pd.DataFrame()
            """pandas.DataFrame: Stores results for genotype-sex comparisons."""
            
            self.paired_metrics = [
            ('interactiontime_sum_familiar', 'interactiontime_sum_stranger'),
            ('interactiontime_avg_familiar', 'interactiontime_avg_stranger'),
            ('approach_sum_familiar', 'approach_sum_stranger')]

            """list of tuples: Pairs of metrics to be compared (familiar vs stranger)."""
            
            self.anova_results = None

        def compare_genotypes(self):
            results = []
            print(f"Paired metrics: {self.paired_metrics}")
            print(f"Genotypes: {self.parent.genotype_comparison.genotype1}, {self.parent.genotype_comparison.genotype2}")
            
            for familiar_metric, stranger_metric in self.paired_metrics:
                for genotype in [self.parent.genotype_comparison.genotype1, self.parent.genotype_comparison.genotype2]:
                    print(f"Processing genotype: {genotype}")
                    print(f"Metrics: {familiar_metric} vs {stranger_metric}")
                    
                    # Filter the data for the current genotype
                    data = self.parent.genotype_nested_df[self.parent.genotype_nested_df['genotype'] == genotype]
                    print(f"Data shape for {genotype}: {data.shape}")
                    print(f"Columns in data: {data.columns}")
                    
                    if familiar_metric not in data.columns or stranger_metric not in data.columns:
                        print(f"Error: Metrics not found in data columns for genotype {genotype}")
                        continue
                    
                    familiar_data = data[familiar_metric]
                    stranger_data = data[stranger_metric]
                    print(f"Familiar data: {familiar_data.head()}")
                    print(f"Stranger data: {stranger_data.head()}")
                    
                    if len(familiar_data) != len(stranger_data):
                        print(f"Error: Mismatched lengths for {genotype}")
                        continue
                    
                    try:
                        t_stat, p_value = stats.ttest_rel(familiar_data, stranger_data)
                        print(f"t-statistic: {t_stat}, p-value: {p_value}")
                    except Exception as e:
                        print(f"Error in t-test for {genotype}: {str(e)}")
                        continue
                    
                    results.append({
                        'genotype': genotype,
                        'familiar_metric': familiar_metric,
                        'stranger_metric': stranger_metric,
                        't_statistic': t_stat,
                        'p_value': p_value
                    })
            
            self.genotype_results = pd.DataFrame(results)
            print(f"Final results shape: {self.genotype_results.shape}")
            return self.genotype_results
        
        def get_genotype_results(self):
            if self.genotype_results is None:
                self.compare_genotypes()  # Calculate results if not already done
            return self.genotype_results

        def perform_two_way_anova(self):
            results = []

            for familiar_metric, stranger_metric in self.paired_metrics:
                # Extract the base metric name
                base_metric = familiar_metric.rsplit('_', 1)[0]  # Remove '_familiar' suffix

                # Reshape data for ANOVA
                data_familiar = self.parent.genotype_nested_df[[familiar_metric, 'genotype']].copy()
                data_familiar['condition'] = 'familiar'
                data_familiar.rename(columns={familiar_metric: 'value'}, inplace=True)

                data_stranger = self.parent.genotype_nested_df[[stranger_metric, 'genotype']].copy()
                data_stranger['condition'] = 'stranger'
                data_stranger.rename(columns={stranger_metric: 'value'}, inplace=True)

                data_combined = pd.concat([data_familiar, data_stranger])

                # Perform two-way ANOVA
                model = ols('value ~ C(genotype) + C(condition) + C(genotype):C(condition)', data=data_combined).fit()
                anova_table = sm.stats.anova_lm(model, typ=2)

                results.append({
                    'metric': base_metric,
                    'anova_table': anova_table
                })

            self.anova_results = results
            return results

        def print_anova_results(self):
            if self.anova_results is None:
                print("No ANOVA results available. Please run perform_two_way_anova() first.")
                return

            for result in self.anova_results:
                print(f"\nTwo-way ANOVA results for {result['metric']}:")
                print(result['anova_table'])
                print("\nInterpretation:")
                for effect in ['C(genotype)', 'C(condition)', 'C(genotype):C(condition)']:
                    p_value = result['anova_table'].loc[effect, 'PR(>F)']
                    if p_value < 0.05:
                        print(f"- Significant effect of {effect} (p = {p_value:.4f})")
                    else:
                        print(f"- No significant effect of {effect} (p = {p_value:.4f})")
                print("\n" + "="*50)
                
                
        def check_assumptions_and_perform_analysis(self):
                results = []

                for familiar_metric, stranger_metric in self.paired_metrics:
                    base_metric = familiar_metric.rsplit('_', 1)[0]
                    print(f"\nAnalyzing {base_metric}:")

                    # Prepare data
                    data = self.parent.genotype_nested_df.reset_index()
                    data_long = pd.melt(data, 
                                        id_vars=['genotype', 'index'], 
                                        value_vars=[familiar_metric, stranger_metric],
                                        var_name='condition', 
                                        value_name='value')
                    data_long['condition'] = data_long['condition'].map({familiar_metric: 'familiar', stranger_metric: 'stranger'})

                    # Check normality
                    _, p_value = stats.normaltest(data_long['value'])
                    print(f"Normality test (D'Agostino's K^2 test) p-value: {p_value:.4f}")
                    if p_value < 0.05:
                        print("Warning: Data may not be normally distributed.")

                    # Check homogeneity of variances
                    levene_statistic, levene_p_value = stats.levene(
                        *[group['value'].values for name, group in data_long.groupby(['genotype', 'condition'])]
                    )
                    print(f"Levene's test p-value: {levene_p_value:.4f}")
                    if levene_p_value < 0.05:
                        print("Warning: Variances may not be homogeneous.")

                    # Visualizations
                    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

                    # Q-Q plot
                    sm.qqplot(data_long['value'], line='45', ax=ax1)
                    ax1.set_title("Q-Q plot")

                    # Box plot
                    sns.boxplot(x='genotype', y='value', hue='condition', data=data_long, ax=ax2)
                    ax2.set_title("Box plot")
                    plt.tight_layout()
                    plt.show()

                    # Perform mixed-effects model analysis
                    model = mixedlm("value ~ genotype * condition", data_long, groups=data_long["index"])
                    model_fit = model.fit()

                    print("\nMixed-effects model results:")
                    print(model_fit.summary())

                    results.append({
                        'metric': base_metric,
                        'model_fit': model_fit
                    })

                self.mixed_model_results = results
                return results
            
        def print_analysis_summary(self):
                if self.mixed_model_results is None:
                    print("No analysis results available. Please run check_assumptions_and_perform_analysis() first.")
                    return

                for result in self.mixed_model_results:
                    print(f"\nSummary for {result['metric']}:")
                    print(result['model_fit'].summary())


In [176]:
# Initialize and process data
mouse_analysis = EzPzMouseAnalytics(file_path)

rename_columns method called.
Columns before renaming: ['cohort', 'tag', 'toe', 'sex', 'dam', 'DOB', 'genotype', 'treatment', 'summouse1', 'summouse2', 'avgmouse1', 'avgmouse2', 'boutmouse1', 'boutmouse2', 'date tested ']
Columns after renaming: ['cohort', 'tag', 'toe', 'sex', 'dam', 'DOB', 'genotype', 'treatment', 'interactiontime_sum_familiar', 'interactiontime_sum_stranger', 'interactiontime_avg_familiar', 'interactiontime_avg_stranger', 'approach_sum_familiar', 'approach_sum_stranger', 'date tested ']


In [177]:
mouse_analysis.genotype_nested_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,cohort,tag,toe,sex,dam,DOB,genotype,treatment,interactiontime_sum_familiar,interactiontime_sum_stranger,interactiontime_avg_familiar,interactiontime_avg_stranger,approach_sum_familiar,approach_sum_stranger,datetested,age_in_months
genotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Rbp4-LMO3,0,3,3427,3,M,3244,2023-06-10,Rbp4-LMO3,CTZ,26,42,2.6,4.666667,10,9.0,2023-11-29,5.65
Rbp4-LMO3,1,4,3435,3,M,3426,2023-06-20,Rbp4-LMO3,CTZ,12,36,2.4,7.2,5,5.0,2024-05-08,10.61
Rbp4-LMO3,2,6,3515,3,M,3377,2023-08-10,Rbp4-LMO3,CTZ,10,0,2.0,0.0,5,0.0,2024-06-03,9.79
Rbp4-LMO3,3,6,3518,4,M,3377,2023-08-10,Rbp4-LMO3,CTZ,22,17,2.444444,6.0,9,2.833333,2024-06-03,9.79
Rbp4-LMO3,4,7,3522,2,M,3378,2023-08-14,Rbp4-LMO3,CTZ,27,1,2.454545,1.0,11,1.0,2024-06-04,9.69


In [178]:
mouse_analysis.genotype_sex_groups.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cohort,tag,toe,sex,dam,DOB,genotype,treatment,interactiontime_sum_familiar,interactiontime_sum_stranger,interactiontime_avg_familiar,interactiontime_avg_stranger,approach_sum_familiar,approach_sum_stranger,datetested,age_in_months
genotype,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Rbp4-LMO3,F,0,3,3440,1,F,3244,2023-06-10,Rbp4-LMO3,CTZ,13,15,4.333333,3.75,3,4.0,2024-03-28,9.59
Rbp4-LMO3,F,1,4,3437,9,F,3246,2023-06-20,Rbp4-LMO3,CTZ,12,14,2.4,2.8,5,5.0,2024-05-07,10.58
Rbp4-LMO3,F,2,4,3439,7,F,3246,2023-06-20,Rbp4-LMO3,CTZ,22,34,2.75,3.4,8,10.0,2024-05-07,10.58
Rbp4-LMO3,F,3,6,3520,7,F,3377,2023-08-10,Rbp4-LMO3,CTZ,19,25,2.375,2.083333,8,12.0,2024-05-28,9.59
Rbp4-LMO3,F,4,7,3525,1,F,3378,2023-08-14,Rbp4-LMO3,CTZ,21,16,1.909091,3.2,11,5.0,2024-07-02,10.61


In [179]:
mouse_analysis.analysis.compare_genotypes()

Paired metrics: [('interactiontime_sum_familiar', 'interactiontime_sum_stranger'), ('interactiontime_avg_familiar', 'interactiontime_avg_stranger'), ('approach_sum_familiar', 'approach_sum_stranger')]
Genotypes: Rbp4-LMO3, wt-LMO3
Processing genotype: Rbp4-LMO3
Metrics: interactiontime_sum_familiar vs interactiontime_sum_stranger
Data shape for Rbp4-LMO3: (23, 16)
Columns in data: Index(['cohort', 'tag', 'toe', 'sex', 'dam', 'DOB', 'genotype', 'treatment',
       'interactiontime_sum_familiar', 'interactiontime_sum_stranger',
       'interactiontime_avg_familiar', 'interactiontime_avg_stranger',
       'approach_sum_familiar', 'approach_sum_stranger', 'datetested',
       'age_in_months'],
      dtype='object')
Familiar data: genotype    
Rbp4-LMO3  0    26
           1    12
           2    10
           3    22
           4    27
Name: interactiontime_sum_familiar, dtype: int64
Stranger data: genotype    
Rbp4-LMO3  0    42
           1    36
           2     0
           3    17
   

Unnamed: 0,genotype,familiar_metric,stranger_metric,t_statistic,p_value
0,Rbp4-LMO3,interactiontime_sum_familiar,interactiontime_sum_stranger,-0.786834,0.439773
1,wt-LMO3,interactiontime_sum_familiar,interactiontime_sum_stranger,-0.454879,0.653653
2,Rbp4-LMO3,interactiontime_avg_familiar,interactiontime_avg_stranger,-1.782366,0.088497
3,wt-LMO3,interactiontime_avg_familiar,interactiontime_avg_stranger,0.546735,0.590064
4,Rbp4-LMO3,approach_sum_familiar,approach_sum_stranger,0.48284,0.633979
5,wt-LMO3,approach_sum_familiar,approach_sum_stranger,-1.426808,0.167674


In [186]:
mouse_analysis.genotype_nested_df

Unnamed: 0_level_0,Unnamed: 1_level_0,cohort,tag,toe,sex,dam,DOB,genotype,treatment,interactiontime_sum_familiar,interactiontime_sum_stranger,interactiontime_avg_familiar,interactiontime_avg_stranger,approach_sum_familiar,approach_sum_stranger,datetested,age_in_months
genotype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Rbp4-LMO3,0,3,3427,3,M,3244,2023-06-10,Rbp4-LMO3,CTZ,26,42,2.6,4.666667,10,9.0,2023-11-29,5.65
Rbp4-LMO3,1,4,3435,3,M,3426,2023-06-20,Rbp4-LMO3,CTZ,12,36,2.4,7.2,5,5.0,2024-05-08,10.61
Rbp4-LMO3,2,6,3515,3,M,3377,2023-08-10,Rbp4-LMO3,CTZ,10,0,2.0,0.0,5,0.0,2024-06-03,9.79
Rbp4-LMO3,3,6,3518,4,M,3377,2023-08-10,Rbp4-LMO3,CTZ,22,17,2.444444,6.0,9,2.833333,2024-06-03,9.79
Rbp4-LMO3,4,7,3522,2,M,3378,2023-08-14,Rbp4-LMO3,CTZ,27,1,2.454545,1.0,11,1.0,2024-06-04,9.69
Rbp4-LMO3,5,7,3523,7,M,3378,2023-08-14,Rbp4-LMO3,CTZ,0,15,0.0,3.0,0,5.0,2024-06-04,9.69
Rbp4-LMO3,6,8,3554,4,M,3243,2023-08-28,Rbp4-LMO3,CTZ,10,15,2.5,3.0,4,5.0,2024-06-04,9.23
Rbp4-LMO3,7,9,3559,6,M,3377,2023-09-03,Rbp4-LMO3,CTZ,36,5,4.0,2.5,9,2.0,2024-06-06,9.1
Rbp4-LMO3,8,9,3561,2,M,3377,2023-09-03,Rbp4-LMO3,CTZ,8,40,2.666667,2.857143,3,14.0,2024-06-06,9.1
Rbp4-LMO3,9,10,3548,1,M,3378,2023-09-08,Rbp4-LMO3,CTZ,15,16,2.5,2.666667,6,6.0,2024-06-07,8.97


In [181]:
mouse_analysis.analysis.genotype_results

Unnamed: 0,genotype,familiar_metric,stranger_metric,t_statistic,p_value
0,Rbp4-LMO3,interactiontime_sum_familiar,interactiontime_sum_stranger,-0.786834,0.439773
1,wt-LMO3,interactiontime_sum_familiar,interactiontime_sum_stranger,-0.454879,0.653653
2,Rbp4-LMO3,interactiontime_avg_familiar,interactiontime_avg_stranger,-1.782366,0.088497
3,wt-LMO3,interactiontime_avg_familiar,interactiontime_avg_stranger,0.546735,0.590064
4,Rbp4-LMO3,approach_sum_familiar,approach_sum_stranger,0.48284,0.633979
5,wt-LMO3,approach_sum_familiar,approach_sum_stranger,-1.426808,0.167674


In [182]:
# Perform the two-way ANOVA
mouse_analysis.analysis.perform_two_way_anova()

# Print the results
mouse_analysis.analysis.print_anova_results()


Two-way ANOVA results for interactiontime_sum:
                                sum_sq    df          F    PR(>F)
C(genotype)                2142.782609   1.0  10.167415  0.001980
C(condition)                 96.043478   1.0   0.455722  0.501400
C(genotype):C(condition)      2.130435   1.0   0.010109  0.920142
Residual                  18546.000000  88.0        NaN       NaN

Interpretation:
- Significant effect of C(genotype) (p = 0.0020)
- No significant effect of C(condition) (p = 0.5014)
- No significant effect of C(genotype):C(condition) (p = 0.9201)


Two-way ANOVA results for interactiontime_avg:
                              sum_sq    df         F    PR(>F)
C(genotype)                18.063606   1.0  5.868435  0.017465
C(condition)                1.047563   1.0  0.340328  0.561132
C(genotype):C(condition)    4.599169   1.0  1.494160  0.224836
Residual                  270.872445  88.0       NaN       NaN

Interpretation:
- Significant effect of C(genotype) (p = 0.0175)
- No sig

In [185]:
# Check ANOVA assumptions
mouse_analysis.analysis.check_assumptions_and_perform_analysis()


Analyzing interactiontime_sum:


ValueError: cannot insert genotype, already exists