# Animal profile

This file creates the profile output for a group of animals.

| ![](../images/profile.jpg) | 
|:--:| 
|Example of profile. 1 point per animal. Each color represents one cage|

This code computes the total time spent in each behaviour as well as the total number of occurrences of this behaviour for each individual. You can choose to compute this profile either over all the experiment or for each night separately. It provides a plot for the total duration of all the different events and a plot for the total number of occurrences of each behaviour as pdf files. It also provides a txt file with all individual values for each variable, as well as statistics (mixed model for each trait, uncorrected: trait as a function of genotype, with a group effect).


In [1]:
'''
Created on 13 sept. 2017

@author: Fabrice de Chaumont and Elodie Ey
'''

import sys
sys.path.insert(1, "../")

import sqlite3
from lmtanalysis.Animal import *
import numpy as np
import matplotlib.pyplot as plt
from lmtanalysis.Event import *
from lmtanalysis.Measure import *
import colorsys
from collections import Counter
import seaborn as sns

from tkinter.filedialog import askopenfilename
from lmtanalysis.Util import getMinTMaxTAndFileNameInput
from lmtanalysis.EventTimeLineCache import EventTimeLineCached
from lmtanalysis.FileUtil import getFilesToProcess
import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas
from datetime import datetime


def computeProfile(file, minT, maxT, night):
    
    connection = sqlite3.connect( file )
    
    pool = AnimalPool()
    pool.loadAnimals( connection )
    
    animalData = {} # will then be the 'profileData'
    # animalData[night] = night #Assigning the night

    for animal in pool.animalDictionnary.keys():
        print(f"computing individual animal:{animal}")
        rfid = pool.animalDictionnary[animal].RFID
        print(f"RFID:{rfid}")
        animalData[rfid]= {}        
        #store the animal
        animalData[rfid]["animal"] = pool.animalDictionnary[animal]
        animalData[rfid]["file"] = file
        
        genoA = None
        try:
            genoA=pool.animalDictionnary[animal].genotype
        except:
            pass
                    
        for behavEvent in behavioralEvents[:-2]:
            
            print(f"computing individual event: {behavEvent}")    
            
            behavEventTimeLine = EventTimeLineCached( connection, file, behavEvent, animal, minFrame=minT, maxFrame=maxT )
            
            totalEventDuration = behavEventTimeLine.getTotalLength()
            nbEvent = behavEventTimeLine.getNumberOfEvent(minFrame = minT, maxFrame = maxT )
            print( "total event duration: " , totalEventDuration )                
            animalData[rfid][behavEventTimeLine.eventName+" TotalLen"] = totalEventDuration
            animalData[rfid][behavEventTimeLine.eventName+" Nb"] = nbEvent
            
            print(behavEventTimeLine.eventName, genoA, behavEventTimeLine.idA, totalEventDuration, nbEvent)

    header = ["file","strain","sex","group","day","exp","RFID","genotype", "user1", "minTime","maxTime"]
    for name in header:
        text_file.write( "{}\t".format ( name ) ) 
    for kAnimal in animalData:    
        #identify the experiment where the animal comes from:
        animalData[kAnimal]["experiment"] = file  #HD: What does it do ?
        #compute the total distance traveled
        COMPUTE_TOTAL_DISTANCE = True #Change to 'True' OR 'False' if you want OR not to compute total Distance
        if COMPUTE_TOTAL_DISTANCE == True:
            animalData[kAnimal]["animal"].loadDetection( start=minT, end=maxT, lightLoad = True )
            animalData[kAnimal]["totalDistance"] = animalData[kAnimal]["animal"].getDistance( tmin=minT,tmax=maxT)/100
        else:
            animalData[kAnimal]["totalDistance"] = "totalDistance"
        
    # write event keys
    firstAnimalKey = next(iter(animalData))
    firstAnimal = animalData[firstAnimalKey]
    for k in firstAnimal.keys():
        text_file.write( "{}\t".format( k.replace(" ", "") ) )
    text_file.write("\n")
    
    for kAnimal in animalData:
        text_file.write( "{}\t".format( file ) )
        text_file.write( "{}\t".format( "strain" ) )
        text_file.write( "{}\t".format( "sex" ) )
        text_file.write( "{}\t".format( "group" ) )
        text_file.write( "{}\t".format( f"night-{night}" ) )
        text_file.write( "{}\t".format( "exp" ) )
        text_file.write( "{}\t".format( animalData[kAnimal]["animal"].RFID ) )
        text_file.write( "{}\t".format( animalData[kAnimal]["animal"].genotype ) )
        text_file.write( "{}\t".format( animalData[kAnimal]["animal"].user1 ) )
        text_file.write( "{}\t".format( minT ) )
        text_file.write( "{}\t".format( maxT ) )

        for kEvent in firstAnimal.keys():
            text_file.write( "{}\t".format( animalData[kAnimal][kEvent] ) )
        text_file.write( "\n" )
        
    connection.close() #Close the connection to the database (to save memory?)
    return animalData


def getProfileValues(profileData, night=0, event=None, nbNightsDic=None):

#     print(f"in getProfileValues, night is = {night}")
    dataDic = {}
    dataDic["genotype"] = []
    dataDic["value"] = []
    dataDic["exp"] = []
    
    for file in profileData.keys():
        if night == 0 or night <= nbNightsDic[file]: #Check if there are more nights in the file
            for animal in profileData[file][night]: # BUG WITH night=1 ?
                if (genoToRemove.lower() != "no") & (genoToRemove != ""): #Test that it is not "NO" or ""
                    if genoToRemove in profileData[file][night][animal]["animal"].genotype:  #SKIPS THE 'genoToRemove' ANIMALS !!!
                        continue
                dataDic["value"].append(profileData[file][night][animal][event])
                dataDic["exp"].append(profileData[file][night][animal]["experiment"])
                dataDic["genotype"].append(profileData[file][night][animal]["animal"].genotype)
        else:
            print(f"During night#{night}, file '{file}' EXLUDED !!!")
            continue
    return dataDic


def plotProfileDataDuration(profileData, night, valueCat, text_file_name, nbNightsDic=None):
    fig, axes = plt.subplots(nrows=5, ncols=6, figsize=(14, 12))
#     plt.xticks(rotation=45, horizontalalignment='right', fontweight='light') #X-axis 45° angle

    row=0
    col=0
    fig.suptitle(t=f"{valueCat} of events (night {night})", y=1.2, fontweight= 'bold')
    
    #plot the data for each behavioural event
    for behavEvent in behavioralEvents[:-2]: # HD: WHY '-2' ??? Not taking the Last Behavior (totalDist) ?
        event = behavEvent+valueCat
        print("event: ", event)

        profileValueDictionary = getProfileValues(profileData=profileData, night=night,
                                                  event=event, nbNightsDic=nbNightsDic)
        y = profileValueDictionary["value"]
        x = profileValueDictionary["genotype"]
        genotypeType = Counter(x)
        group = profileValueDictionary["exp"]
        
        print("y: ", y)
        print("x: ", x)
        print("group: ", group)
        experimentType = Counter(group)
        print("Nb of experiments: ", len(experimentType))
        
        axes[row,col].set_xlim(-0.5, 1.5)
        axes[row,col].set_ylim(min(y)-0.2*max(y), max(y)+0.2*max(y))
        sns.stripplot(x, y, jitter=True, hue=group, s=5, ax=axes[row,col] )
        axes[row,col].set_title(behavEvent)
        axes[row,col].set_ylabel("{} (frames)".format(valueCat))
        axes[row,col].legend().set_visible(False)
        axes[row,col].spines['right'].set_visible(False)
        axes[row,col].spines['top'].set_visible(False)
                
        if col<5:
            col+=1
            row=row
        else:
            col=0
            row+=1

    # Plot the data for the total distance traveled   
    print("plot Total Distance")
    profileValueDictionary = getProfileValues(profileData=profileData, night=night, 
                                              event="totalDistance", nbNightsDic=nbNightsDic)
    y = profileValueDictionary["value"]
    x = profileValueDictionary["genotype"]
    genotypeType = Counter(x)
    group = profileValueDictionary["exp"]
    
    print("y: ", y)
    print("x: ", x)
    print("group: ", group)
    experimentType = Counter(group)
    print("Nb of experiments: ", len(experimentType))
    
    axes[row,col].set_xlim(-0.5, 1.5)
    axes[row,col].set_ylim(min(y)-0.2*max(y), max(y)+0.2*max(y))
    sns.stripplot(x, y, jitter=True, hue=group, s=5, ax=axes[row,col] )
    axes[row,col].set_title("Activity")
    axes[row,col].set_ylabel("total distance (m)")
    axes[row,col].legend().set_visible(False)
    axes[row,col].spines['right'].set_visible(False)
    axes[row,col].spines['top'].set_visible(False)
    
    if col<7:
        col+=1
        row=row
    else:
        col=0
        row+=1
    
    #ROTATE THE X-AXIS OF ALL SUBPLOTS
    for ax in fig.axes:
        matplotlib.pyplot.sca(ax)
        plt.xticks(rotation=45)
        
    fig.tight_layout()
    figFileName = f"{text_file_name}_{valueCat}_night_{night}.png"
    print("Saving " , figFileName )
    fig.savefig( figFileName ,dpi=100)
    plt.close( fig )
    

def testProfileData(profileData=None, night=0, eventListNames=None, valueCat="", 
                    text_file=None, nbNightsDic=None):
    for behavEvent in eventListNames:
        event = behavEvent+valueCat
        print("event: ", event)
        text_file.write("Test for the event: {} night {}".format( event, night ) )
        
        profileValueDictionary = getProfileValues(profileData=profileData, night=night, 
                                                  event=event, nbNightsDic=nbNightsDic)
        
        dfData = pandas.DataFrame({'group': profileValueDictionary["exp"],
                                   'genotype': profileValueDictionary["genotype"],
                                   'value': profileValueDictionary["value"]})
        
        #pandas.DataFrame(dfData).info()
        
        #Mixed model: variable to explain: value; fixed factor = genotype; random effect: group
        model = smf.mixedlm("value ~ genotype", dfData, groups = dfData["group"])  #Creates the model 
        result = model.fit()  #Run model
        print(result.summary())  #Print summary
        text_file.write(result.summary().as_text())


if __name__ == '__main__':
    
    print("Code launched.")

    # datetime object containing current date and time
    now = datetime.now()
    print("Start @", now)
    
    files = getFilesToProcess()
    tmin, tmax, text_file, text_file_name = getMinTMaxTAndFileNameInput()

    profileData = {}
    nightComputation = input("Compute profile only during night events (Yes or No)? ")
    doStats = input("Do you want to do Stats on your data (Yes or No)? (Warning: MORE THAN ONE FILE REQUIRED): ")
    genoToRemove = ""
    while genoToRemove == "":
        genoToRemove = input("Is there a Genotype to exclude? (a 'keyword' or 'NO' if None) DON'T LEAVE BLANK!): ")
    nbMaxAnimal = int(input("What's the MAX number of Animals per LMT ? (1,2,3 or 4): "))
    
    ### List of events to be computed within the behavioral profile     ###
    ### and header for the computation of the total distance travelled. ###
    
    if nbMaxAnimal == 2:
        behavioralEvents = ["Contact", "Oral-oral Contact", "Oral-genital Contact", "Side by side Contact", 
                            "Side by side Contact, opposite way", "Social approach", "Get away", "Break contact", 
                            "Approach contact","Approach rear", "FollowZone Isolated", "Train2", "Group2", 
                            "Move isolated", "Move in contact", "Rear isolated", "Rear in contact", "Stop isolated", 
                            "WallJump","Water Zone","Fight","Gets to Fight","Won Fight","Lost Fight", "SAP",
                            "extra", "totalDistance"]
    if nbMaxAnimal == 3:
        behavioralEvents = ["Contact", "Oral-oral Contact", "Oral-genital Contact", "Side by side Contact", 
                            "Side by side Contact, opposite way", "Social approach", "Get away", "Approach contact", 
                            "Approach rear", "Break contact", "FollowZone Isolated", "Train2", "Group2", "Group3",
                            "Group 3 break", "Group 3 make", "Move isolated", "Move in contact", "Rear isolated", "SAP",
                            "Rear in contact", "Stop isolated", "WallJump","extra", "totalDistance"]
    if nbMaxAnimal == 4:
        behavioralEvents = ["Contact", "Oral-oral Contact", "Oral-genital Contact", "Side by side Contact", 
                            "Side by side Contact, opposite way", "Social approach", "Get away", "Approach contact", 
                            "Approach rear", "Break contact", "FollowZone Isolated", "Train2", "Group2", "Group3", "Group4",
                            "Group 3 break", "Group 3 make", "Group 4 break", "Group 4 make", "Move isolated", "SAP",
                            "Move in contact", "Rear isolated", "Rear in contact", "Stop isolated", 
                            "WallJump","extra", "totalDistance"]
    
#     behavioralEvents = ["Contact", "Oral-oral Contact", "Oral-genital Contact", "Side by side Contact", 
#                         "Side by side Contact, opposite way", "Social approach", "Get away", "Approach contact", 
#                         "Approach rear", "Break contact", "FollowZone Isolated", "Train2", "Group2", "Group3", "Group4",
#                         "Group 3 break", "Group 3 make", "Group 4 break", "Group 4 make", "Move isolated", 
#                         "Move in contact", "Rear isolated", "Rear in contact", "Stop isolated", "SAP",
#                         "WallJump","extra", "totalDistance"]
#     behavioralEvents = ["Fight","Gets to Fight","Won Fight","Lost Fight","extra", "totalDistance"]

    for file in files:
        print(file)
        connection = sqlite3.connect( file )
        profileData[file] = {}
    
        pool = AnimalPool( )
        pool.loadAnimals( connection )

        if nightComputation.lower() == "no":
#             print("no-1")
            minT = tmin
            maxT = tmax
            n = 0
            #Compute profile data and save them in a text file
            profileData[file][n] = computeProfile(file = file, minT=minT, maxT=maxT, night=n)
            text_file.write( "\n" )
            print("Profile data saved.")
            
        if nightComputation.lower() == "yes":
#             print("yes-1")
            nightEventTimeLine = EventTimeLineCached(connection, file, "night", minFrame=tmin, maxFrame=tmax )
            n = 1
            
            for eventNight in nightEventTimeLine.getEventList():
                minT = eventNight.startFrame
                maxT = eventNight.endFrame
                print("* * **   Night: ", n, "   ** * *")
                print(minT, maxT)
                #Compute profile data and save them in a text file
                profileData[file][n] = computeProfile(file=file, minT=minT, maxT=maxT, night=n)
                text_file.write( "\n" )
                print("-- Profile data saved. --")
                
                n+=1 #increase 'n' for the following nights
        else: #Neither 'yes' or 'no' for night computation
            print("There is a problem with your answer!")
            
        connection.close() #Close the connection with database (Saves memory?)

    text_file.write( "\n" )
    print("--- Write text before plotting ProfileData ---")    
    
    print('### Checking number of nights ###')
    nb_nights = {}
    for file in profileData:
        print(file)
        nb_nights[file]=len(profileData[file])
        print(len(profileData[file]))
    print(nb_nights)
    nb_max_nights = max(nb_nights.keys(), key=nb_nights.get)
    nb_min_nights = min(nb_nights.keys(), key=nb_nights.get)

    print(f'Maximum number of nights: {nb_nights[nb_max_nights]} in {nb_max_nights}')
    print(f'Minimum number of nights: {nb_nights[nb_min_nights]} in {nb_min_nights}')
    
    if nightComputation.lower() == "no":
#         print("no-2")
        n = 0

        #Plot profile data and save them in a png figure
        plotProfileDataDuration(profileData=profileData, night=n, 
                                valueCat=" TotalLen", text_file_name = text_file_name, nbNightsDic=None)
        plotProfileDataDuration(profileData=profileData, night=n, 
                                valueCat=" Nb", text_file_name = text_file_name, nbNightsDic=None)
        
        #Test profile data and save results in a text file
        text_file.write( "Statistical analysis: mixed linear models" )
        text_file.write( "{}\n" )
        testProfileData(profileData=profileData, night=n, eventListNames=behavioralEvents[:-2], 
                        valueCat=" TotalLen", text_file=text_file, nbNightsDic=None)
        testProfileData(profileData=profileData, night=n, eventListNames=behavioralEvents[:-2], 
                        valueCat=" Nb", text_file=text_file, nbNightsDic=None)
        print("test for total distance")
        testProfileData(profileData=profileData, night=n, eventListNames=["totalDistance"], 
                        valueCat="", text_file=text_file, nbNightsDic=None)
        
    if nightComputation.lower() == "yes":
        #Following line removed because 'nightEventTimeLine' is already computed before
#         nightEventTimeLine = EventTimeLineCached( connection, file, "night", minFrame=tmin, maxFrame=tmax )
        
        n = 1 #night counter
    
        for eventNight in nightEventTimeLine.getEventList():
            print("* * **  *** Night: ", n, " ***  ** * *")

            #Plot profile data and save them
            plotProfileDataDuration(profileData=profileData, night=n, valueCat=" TotalLen", 
                                    text_file_name = text_file_name, nbNightsDic=nb_nights)
            plotProfileDataDuration(profileData=profileData, night=n, valueCat=" Nb", 
                                    text_file_name = text_file_name, nbNightsDic=nb_nights)


#             plotProfileDataDuration(profileData=profileData, night=n, valueCat=" TotalLen")
#             plotProfileDataDuration(profileData=profileData, night=n, valueCat=" Nb")

            if doStats.lower() == "yes" :
                #Test profile data and save results in a text file
                print(" -- Now, doing some stats...")
                text_file.write( "Statistical analysis: mixed linear models" )
                text_file.write( "{}\n" )
                testProfileData(profileData=profileData, night=n, eventListNames=behavioralEvents[:-2], 
                                valueCat=" TotalLen", text_file=text_file, nbNightsDic=nb_nights)
                testProfileData(profileData=profileData, night=n, eventListNames=behavioralEvents[:-2], 
                                valueCat=" Nb", text_file=text_file, nbNightsDic=nb_nights)
                print("test for total distance")
                testProfileData(profileData=profileData, night=n, eventListNames=["totalDistance"], 
                                valueCat="", text_file=text_file, nbNightsDic=nb_nights)
            n+=1 #Implement the night counter
#             if n > nb_min_nights:
#                 print("Different number of nights in the databases provided, the computation stops here ...")

    print("") 
    print( "***************************************************")
    print ("Plots saved as png and analyses saved in text file.")
    text_file.close()

    print ("*** ALL JOBS DONE ***")
    # datetime object containing current date and time
    now = datetime.now()
    print("Ends @", now)
    

Code launched.
Start @ 2022-12-06 11:59:50.387266
Enter time information in frame. You can also set in days, hour, minutes
valid entries are: 100, 1d, 1.5d, 23.5h, 1d 2h 3m 4s 5f


Starting t :  0


Entry (in frame) : 0


Ending t :  115000


Entry (in frame) : 115000


Enter file name to save data (.txt will be added):  zefd
Compute profile only during night events (Yes or No)?  No
Is there a Genotype to exclude? (a 'keyword' or 'NO' if None) DON'T LEAVE BLANK!):  
Is there a Genotype to exclude? (a 'keyword' or 'NO' if None) DON'T LEAVE BLANK!):  No
What's the MAX number of Animals per LMT ? (1,2,3 or 4):  3


E:/LMT + Amphetamine/LMT recordings (DBs rebuilt)/221021_Amphet_Cage2_weekend2.sqlite
Loading animals ...
Fields available in lmtanalysis Z:  ['ID', 'RFID', 'GENOTYPE', 'NAME']
SQL Query: SELECT ID,RFID,NAME,GENOTYPE FROM ANIMAL ORDER BY GENOTYPE
Animal Id:1 Name:A RFID:000004849551 Genotype:NaCl User1:None
Animal Id:2 Name:B RFID:000004849390 Genotype:NaCl User1:None
Animal Id:3 Name:C RFID:000004849540 Genotype:NaCl User1:None
Loading animals ...
Fields available in lmtanalysis Z:  ['ID', 'RFID', 'GENOTYPE', 'NAME']
SQL Query: SELECT ID,RFID,NAME,GENOTYPE FROM ANIMAL ORDER BY GENOTYPE
Animal Id:1 Name:A RFID:000004849551 Genotype:NaCl User1:None
Animal Id:2 Name:B RFID:000004849390 Genotype:NaCl User1:None
Animal Id:3 Name:C RFID:000004849540 Genotype:NaCl User1:None
computing individual animal:1
RFID:000004849551
computing individual event: Contact
Contact  Id( 1 , None , None , None ) Min/maxFrame: ( 0 / 115000 ) Loaded ( 226  records loaded in  0.7250015735626221 S )
Caching event

  axes[row,col].set_ylim(min(y)-0.2*max(y), max(y)+0.2*max(y))


Saving  zefd.txt_ TotalLen_night_0.png
event:  Contact Nb
y:  [226, 248, 221]
x:  ['NaCl', 'NaCl', 'NaCl']
group:  ['E:/LMT + Amphetamine/LMT recordings (DBs rebuilt)/221021_Amphet_Cage2_weekend2.sqlite', 'E:/LMT + Amphetamine/LMT recordings (DBs rebuilt)/221021_Amphet_Cage2_weekend2.sqlite', 'E:/LMT + Amphetamine/LMT recordings (DBs rebuilt)/221021_Amphet_Cage2_weekend2.sqlite']
Nb of experiments:  1
event:  Oral-oral Contact Nb
y:  [193, 188, 186]
x:  ['NaCl', 'NaCl', 'NaCl']
group:  ['E:/LMT + Amphetamine/LMT recordings (DBs rebuilt)/221021_Amphet_Cage2_weekend2.sqlite', 'E:/LMT + Amphetamine/LMT recordings (DBs rebuilt)/221021_Amphet_Cage2_weekend2.sqlite', 'E:/LMT + Amphetamine/LMT recordings (DBs rebuilt)/221021_Amphet_Cage2_weekend2.sqlite']
Nb of experiments:  1
event:  Oral-genital Contact Nb
y:  [192, 376, 219]
x:  ['NaCl', 'NaCl', 'NaCl']
group:  ['E:/LMT + Amphetamine/LMT recordings (DBs rebuilt)/221021_Amphet_Cage2_weekend2.sqlite', 'E:/LMT + Amphetamine/LMT recordings (DB

  axes[row,col].set_ylim(min(y)-0.2*max(y), max(y)+0.2*max(y))


Saving  zefd.txt_ Nb_night_0.png
event:  Contact TotalLen
          Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: value       
No. Observations: 3       Method:             REML        
No. Groups:       1       Scale:              3519909.3333
Min. group size:  3       Log-Likelihood:     -18.4611    
Max. group size:  3       Converged:          Yes         
Mean group size:  3.0                                     
----------------------------------------------------------
             Coef.    Std.Err. z P>|z|   [0.025    0.975] 
----------------------------------------------------------
Intercept   12544.667    0.000   0.000 12544.667 12544.667
Group Var 3519909.333                                     

event:  Oral-oral Contact TotalLen
              Mixed Linear Model Regression Results
Model:                MixedLM    Dependent Variable:    value     
No. Observations:     3          Method:                REML      
No. Groups:           1

  sdf[0:self.k_fe, 2] = sdf[0:self.k_fe, 0] / sdf[0:self.k_fe, 1]
  sdf[0:self.k_fe, 2] = sdf[0:self.k_fe, 0] / sdf[0:self.k_fe, 1]


LinAlgError: Singular matrix