<a href="https://colab.research.google.com/github/jacobdavidson/zfish_mutants_analysis/blob/master/Data_and_calculation_minimal_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import scipy
import pandas as pd

In [None]:
# generate random data in order to test the format of the functions
numfish = 25
arenasize=100
numtimesteps = 1000
dfblank = pd.DataFrame(columns=['timestep','ID','x','y'])
df = dfblank.copy()
for step in range(numtimesteps):
    # generate random positions
    dfsingle = dfblank.copy()
    dfsingle['ID'] = np.arange(numfish)
    dfsingle['timestep'] = step
    dfsingle[['x','y']] = np.random.random_sample(size=(numfish,2))*arenasize
    df = pd.concat((df,dfsingle))
df = df.reset_index(drop=True)

In [None]:
df

Unnamed: 0,timestep,ID,x,y
0,0,0,33.029646,31.306476
1,0,1,50.976228,4.302006
2,0,2,73.540262,25.298484
3,0,3,90.020010,48.917747
4,0,4,45.598375,58.760542
...,...,...,...,...
24995,999,20,76.327538,75.582709
24996,999,21,77.840298,73.196207
24997,999,22,22.688061,13.492584
24998,999,23,4.274942,64.077875


In [None]:
## functions for calculating metrics

def distfns(dfsel):  # returns
    # calculate distance using groupby
    group = dfsel.groupby('timestep')
    getdist = lambda g: np.array(scipy.spatial.distance.cdist(g[['x','y']], g[['x','y']]))
    distance_matrices = group.apply(getdist)
    distance_matrices = np.array(list(distance_matrices))
    # fill diagonals with nans
    distance_matrices[:,np.arange(numfish),np.arange(numfish)] = np.nan
    # get median of all distances
    medianspacing = np.nanmedian(distance_matrices)
    # get median nearest distance
    nndists = np.nanmin(distance_matrices,axis=2)
    mediannndist = np.median(nndists)
    return medianspacing, mediannndist

def centroidspeedfn(dfsel):
    group = dfsel.groupby('timestep')
    centroids = group.apply(lambda g: [np.mean(g['x']),np.mean(g['y'])])
    centroids = np.array(list(centroids))
    centroidvels = np.diff(centroids,axis=0)
    centroidspeeds = np.linalg.norm(centroidvels,axis=1)
    return np.median(centroidspeeds)

def speedfns(dfsel):
    # calculate the speed of each individual
    group = dfsel.groupby('ID')
    vels = group.apply(lambda g: np.diff(g[['x','y']],axis=0))
    vels = np.array(list(vels)).swapaxes(0,1)
    speeds = np.linalg.norm(vels,axis=2)
    medianspeed = np.median(speeds)
    speedIQR = np.diff(np.quantile(speeds,q=[0.25,0.75]))[0]
    ## calculate polarization coefficient at each time step
    unitvel = vels/speeds[:,:,np.newaxis]  # unit velocity vector
    polarization = np.sum(np.mean(unitvel,axis=1)**2,axis=1)  # polarization coefficient according to Tunstrøm et al. (2013)
    return medianspeed, speedIQR, np.mean(polarization)

In [None]:
# example:  calculate these for time ranges in the dataset
timeinterval = 100  #number of timesteps to select for calculations
numintervals = np.ceil(np.max(df['timestep'])/timeinterval).astype(int)
dfmetrics = pd.DataFrame(columns=['timeinterval','spacing','nearest-neighbor-distance','centroid-speed','median-speed','speedIQR','polarization'])
for t in range(numintervals):
    dfsel = df[(df['timestep']>=t*timeinterval)&(df['timestep']<(t+1)*timeinterval)]
    # calculate metrics
    medianspacing, mediannndist = distfns(dfsel)
    centroidspeed = centroidspeedfn(dfsel)
    medianspeed,speedIQR, polarization = speedfns(dfsel)
    # save back in the dataframe
    vals = {'timeinterval': t, 'spacing': medianspacing, 'nearest-neighbor-distance': mediannndist,
           'centroid-speed': centroidspeed, 'median-speed': medianspeed, 'speedIQR':speedIQR, 'polarization':polarization}
    dfmetrics.loc[len(dfmetrics)] = vals


In [None]:
dfmetrics

Unnamed: 0,timeinterval,spacing,nearest-neighbor-distance,centroid-speed,median-speed,speedIQR,polarization
0,0,50.786551,10.214035,9.921035,50.053891,37.159707,0.043039
1,1,50.776641,9.908221,9.70782,51.125625,37.557135,0.045703
2,2,51.300425,9.988572,8.748844,50.728021,36.199595,0.037398
3,3,52.009494,9.973272,8.926851,50.633423,39.286217,0.034047
4,4,51.522673,10.150089,8.098391,51.72298,37.341742,0.036772
5,5,51.405082,9.940939,7.849329,51.367341,38.236185,0.030702
6,6,51.117999,9.988431,8.807941,49.767691,37.381087,0.035587
7,7,51.182528,10.10133,10.129722,50.513953,37.688059,0.043045
8,8,51.532409,10.019297,10.074272,50.859497,38.467924,0.038939
9,9,50.798292,9.884932,9.131003,51.217129,37.426673,0.03908
