In [6]:
import pandas as pd 


In [1]:
import sys
print(sys.executable)

/opt/anaconda3/envs/ezpz_env/bin/python


In [8]:
# List the contents of the current directory
!ls
# Print the working directory
!pwd
# Check the Python version
!python --version


ezpz_analysis.ipynb
/Users/ecrespo/Documents/github_project_folder/ezpzmouseanalytics/notebooks
Python 3.8.19


In [9]:
class EzPzMouseAnalytics:
    def __init__(self, file_path):
        """
        Initialize the EzPzMouseAnalytics object with the path to the data file.
        
        :param file_path: Path to the CSV file containing the data.
        """
        self.file_path = file_path
        self.data = None

    def extract_data(self):
        """
        Extract data from the CSV file and store it in a DataFrame.
        
        :return: DataFrame containing the loaded data.
        """
        self.data = pd.read_csv(self.file_path)
        return self.data

    def calculate_age_in_months(self):
        """
        Calculate the age of each mouse at the time of testing in months.
        
        :return: DataFrame with columns for DOB, date tested, and age in months.
        """
        if self.data is None:
            raise ValueError("Data not loaded. Please run extract_data() first.")
        
        # Convert DOB and date tested to datetime objects
        self.data['DOB'] = pd.to_datetime(self.data['DOB'], format='%m/%d/%y')
        self.data['date tested'] = pd.to_datetime(self.data['date tested '], format='%m/%d/%y')

        # Calculate the difference in months
        self.data['age_in_months'] = ((self.data['date tested'] - self.data['DOB']).dt.days / 30.44).round(2)
        return self.data[['DOB', 'date tested', 'age_in_months']]

    def check_unique_tags(self):
        """
        Ensure that the 'tag' column contains unique identifiers for each mouse.
        
        :return: DataFrame with duplicate tag entries if any exist; otherwise, None.
        """
        if self.data is None:
            raise ValueError("Data not loaded. Please run extract_data() first.")
        
        duplicate_tags = self.data[self.data.duplicated('tag', keep=False)]
        
        if not duplicate_tags.empty:
            return duplicate_tags[['tag', 'DOB', 'date tested', 'genotype', 'treatment']]
        else:
            print("All 'tag' values are unique.")
            return None

# Example usage:
# file_path = '/mnt/data/Rbp4_social.csv'
# mouse_analysis = EzPzMouseAnalytics(file_path)
# mouse_analysis.extract_data()
# mouse_analysis.calculate_age_in_months()
# mouse_analysis.check_unique_tags()



In [10]:
file_path = '/Users/ecrespo/Documents/github_project_folder/ezpzmouseanalytics/data/raw/Rbp4_social.csv' 

In [11]:
mouse_analysis = EzPzMouseAnalytics(file_path)
mouse_analysis.extract_data()
mouse_analysis.calculate_age_in_months()
mouse_analysis.check_unique_tags()


All 'tag' values are unique.


In [12]:
mouse_analysis.data

Unnamed: 0,cohort,tag,toe,sex,dam,DOB,genotype,treatment,summouse1,summouse2,avgmouse1,avgmouse2,boutmouse1,boutmouse2,date tested,Unnamed: 15,Unnamed: 16,date tested.1,age_in_months
0,3,3427,3,M,3244,2023-06-10,Rbp4-LMO3,CTZ,26,42,2.6,4.666667,10,9.0,11/29/23,,,2023-11-29,5.65
1,3,3428,4,M,3244,2023-06-10,wt-LMO3,CTZ,51,52,5.1,5.777778,10,9.0,11/29/23,,,2023-11-29,5.65
2,4,3434,4,M,3246,2023-06-20,wt-LMO3,CTZ,42,60,10.5,4.615385,4,13.0,5/8/24,,,2024-05-08,10.61
3,4,3435,3,M,3426,2023-06-20,Rbp4-LMO3,CTZ,12,36,2.4,7.2,5,5.0,5/8/24,,,2024-05-08,10.61
4,4,3436,1,M,3246,2023-06-20,wt-LMO3,CTZ,47,65,5.875,5.909091,8,11.0,5/8/24,,,2024-05-08,10.61
5,5,3442,2,M,3426,2023-07-14,wt-LMO3,CTZ,74,47,8.222222,9.4,9,5.0,5/9/24,,,2024-05-09,9.86
6,6,3514,2,M,3377,2023-08-10,wt-LMO3,CTZ,23,5,2.875,1.666667,8,3.0,6/3/24,,,2024-06-03,9.79
7,6,3515,3,M,3377,2023-08-10,Rbp4-LMO3,CTZ,10,0,2.0,0.0,5,0.0,6/3/24,,,2024-06-03,9.79
8,6,3516,8,M,3377,2023-08-10,wt-LMO3,CTZ,20,16,2.875143,2.0,7,8.0,6/3/24,,,2024-06-03,9.79
9,6,3517,1,M,3377,2023-08-10,wt-LMO3,CTZ,23,16,2.555556,3.2,9,5.0,6/3/24,,,2024-06-03,9.79


[0;31mType:[0m        Index
[0;31mString form:[0m
Index(['cohort', 'tag', 'toe', 'sex', 'dam', 'DOB', 'genotype', 'treatment',
           'summouse1',  <...> ed ', 'Unnamed: 15', 'Unnamed: 16',
           'date tested', 'age_in_months'],
           dtype='object')
[0;31mLength:[0m      19
[0;31mFile:[0m        /opt/anaconda3/envs/ezpz_env/lib/python3.8/site-packages/pandas/core/indexes/base.py
[0;31mDocstring:[0m  
Immutable sequence used for indexing and alignment.

The basic object storing axis labels for all pandas objects.

.. versionchanged:: 2.0.0

   Index can hold all numpy numeric dtypes (except float16). Previously only
   int64/uint64/float64 dtypes were accepted.

Parameters
----------
data : array-like (1-dimensional)
dtype : NumPy dtype (default: object)
    If dtype is None, we find the dtype that best fits the data.
    If an actual dtype is provided, we coerce to that dtype if it's safe.
    Otherwise, an error will be raised.
copy : bool
    Make a copy of i