# Individual Feature Determination

## Preparation

### Import

In [None]:
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pd.set_option('display.max_columns', 100)

## Function Definition

### Calculate Individual Features

#### Percentage Members Shown Face

In [None]:
def members_show_face_percen(df_group,df_indiv):

  grouped = df_indiv.groupby('group')
  dict_shown_face = {}

  for group, data in grouped:
      count = data['shown_face'].value_counts().get(True, 0)
      dict_shown_face[group] = count
  
  # Fill the DataFrame with the values from the dictionary
  df_group['members_show_face'] = df_group.index.map(dict_shown_face).fillna(0)
  df_group['members_show_face_percen'] = df_group['members_show_face'] / df_group['num_members']
  df_group= df_group.drop('members_show_face', axis=1)

  return df_group

#### Percentage Share Screen

In [None]:
def screen_share_percen(df_group):

  df_group['screen_share_percen'] = df_group['share_screen_time'] / df_group['total_duration_recorded']
  	
  return df_group


#### Overall Duration Spoken

In [None]:
def total_duration_spoken(df_group,df_indiv):

  grouped = df_indiv.groupby('group')
  spoken_duration = {}

  for group, data in grouped:
      sum = data['indiv_spoken_time'].sum()
      spoken_duration[group] = sum

  df_group['total_duration_spoken'] = df_group.index.map(spoken_duration).fillna(np.NaN)

  return df_group


#### Ratio Spoken-Recorded

In [None]:
def ratio_spoken_recorded(df_group,df_indiv):

  df_group['ratio_spoken_recorded'] = df_group['total_duration_spoken'] / df_group['total_duration_recorded']

  return df_group


#### Average Meeting Turn Duration

In [None]:
def avg_meeting_turn_duration(df_group, df_indiv):
  
    grouped = df_indiv.groupby('group')
    avg_durations = {}

    for group, data in grouped:
        turns_and_avg = []
        total = 0
        for index, row in data.iterrows():
            if pd.notna(row['num_turns']) and pd.notna(row['average_turn_duration']):
                turns_and_avg.append((row['num_turns'], row['average_turn_duration']))
                total += row['num_turns']
        if turns_and_avg:
            result = sum(pair[0] / total * pair[1] for pair in turns_and_avg)
            avg_durations[group] = result
    
    df_group['avg_meeting_turn_duration'] = df_group.index.map(avg_durations).fillna(np.NaN)

    return df_group

#### Total Turns Overall

In [None]:
def total_turns(df_group,df_indiv):

  grouped = df_indiv.groupby('group')
  turns = {}

  for group, data in grouped:
      sum = data['num_turns'].sum()
      turns[group] = sum

  df_group['total_turns'] = df_group.index.map(turns).fillna(np.NaN)

  return df_group


#### Turns per minute

In [None]:
def turns_in_a_min(df_group,df_indiv):

  df_group['turns_in_a_min'] = df_group['total_turns'] / df_group['total_duration_recorded']*60

  return df_group


#### Average meeting Words in Message

In [None]:
def average_meeting_words_message(df_group,df_indiv):

    grouped = df_indiv.groupby('group')
    avg_words = {}

    for group, data in grouped:
        words_and_avg = []
        total = 0
        for index, row in data.iterrows():
            if pd.notna(row['num_words']) and pd.notna(row['avg_words_turn']):
                words_and_avg.append((row['num_words'], row['avg_words_turn']))
                total += row['num_words']
        if words_and_avg:
            result = sum(pair[0] / total * pair[1] for pair in words_and_avg)
            avg_words[group] = result
    
    df_group['average_meeting_words_message'] = df_group.index.map(avg_words).fillna(np.NaN)

    return df_group

#### Total Words in General

In [None]:
def total_words(df_group,df_indiv):

  grouped = df_indiv.groupby('group')
  words = {}

  for group, data in grouped:
      sum = data['num_words'].sum()
      words[group] = sum

  df_group['total_words'] = df_group.index.map(words).fillna(np.NaN)

  return df_group


#### Words per minute

In [None]:
def words_in_a_min(df_group,df_indiv):

  df_group['words_in_a_min'] = df_group['total_words'] / df_group['total_duration_recorded']*60

  return df_group

### Calculate Descriptive Estadistics given a column

In [None]:
def calc_descriptive_stadistics_column2(df_group, df_indiv, group_col, column):

    grouped = df_indiv.groupby(group_col)
    turns = {}
    statistics_list= ['avg', 'stdev', 'min', 'max', 'diff']
    blank_dictionaries = {i: {} for i in statistics_list}

    for group, data in grouped:
      blank_dictionaries['avg'][group] = data[column].mean()
      blank_dictionaries['stdev'][group] = data[column].std()
      blank_dictionaries['min'][group] = data[column].min()
      blank_dictionaries['max'][group] = data[column].max()
      blank_dictionaries['diff'][group] = data[column].max() - data[column].min()

    for stats in statistics_list:
      df_group[stats+"_"+column] = blank_dictionaries[stats].values

    # Return the modified DataFrame
    return df_group

In [None]:
def calc_descriptive_statistics_column(df_group, df_indiv, group_col, column):
    grouped = df_indiv.groupby(group_col)
    statistics_list = ['avg', 'stdev', 'min', 'max', 'diff']
    blank_dictionaries = {i: {} for i in statistics_list}

    for group, data in grouped:
        blank_dictionaries['avg'][group] = data[column].mean()
        blank_dictionaries['stdev'][group] = data[column].std()
        blank_dictionaries['min'][group] = data[column].min()
        blank_dictionaries['max'][group] = data[column].max()
        blank_dictionaries['diff'][group] = data[column].max() - data[column].min()

    new_data = pd.DataFrame.from_dict(blank_dictionaries)

    # Rename the columns in new_data
    new_data.columns = [stats + "_" + column for stats in statistics_list]

    # Merge the new DataFrame with df_group
    df_group = pd.concat([df_group, new_data], axis=1)

    # Return the modified DataFrame
    return df_group


### Add Descriptive Estadistics given a Range

In [None]:
def add_descriptive_stadistics_range(df_group, df_indiv, start_col, end_col):

  cols_range = df_indiv.columns[df_indiv.columns.get_loc(start_col):df_indiv.columns.get_loc(end_col)+1].tolist()
    
  # Loop through the emotion columns and calculate the average for each
  for col in cols_range:
      df_group= calc_descriptive_statistics_column(df_group, df_indiv, "group", col)

  return df_group

### Calculate Group Features

In [None]:
def calc_group_features(indiv_features_file, group_features, file_out):

  df_indiv = pd.read_excel(indiv_features_file, index_col=0)
  df_group = pd.read_excel(group_features_file, index_col=0)
  df_group = df_group.transpose()

  stripped_rows = df_group.iloc[:4]  # Extract the first 4 rows and store them
  df_group = df_group.iloc[4:]
  df_indiv = df_indiv.iloc[3:]

  df_group= members_show_face_percen(df_group,df_indiv)
  df_group= screen_share_percen(df_group)
  df_group= total_duration_spoken(df_group,df_indiv)
  df_group= ratio_spoken_recorded(df_group,df_indiv)
  df_group= avg_meeting_turn_duration(df_group,df_indiv)
  df_group= total_turns(df_group,df_indiv)
  df_group= turns_in_a_min(df_group,df_indiv)
  df_group= average_meeting_words_message(df_group,df_indiv)
  df_group= total_words(df_group,df_indiv)
  df_group= words_in_a_min(df_group,df_indiv)
  
  df_group=add_descriptive_stadistics_range(df_group, df_indiv,"indiv_spoken_time", "text_sadness")
  
  #df_group = df_group.transpose()
  df_group = pd.concat([stripped_rows, df_group]).reset_index(drop=True)
  df_group.to_csv(file_out,encoding='utf-8-sig', index=False)

  return df_group

## Use of Function

In [None]:
indiv_features_file = r'/content/drive/MyDrive/Projects/tps/data/12. features/4_individual_features_final_ratio.xlsx'
group_features_file = r'/content/drive/MyDrive/Projects/tps/data/12. features/6_group_meetings_features_byhand_filled.xlsx'
file_out= r'/content/drive/MyDrive/Projects/tps/data/12. features/7_group_meetings_features_final.xlsx'

In [None]:
calc_group_features(indiv_features_file, group_features_file, file_out)


Unnamed: 0,group_name,num_members,num_recordings,total_duration_recorded,share_screen_time,members_show_face_percen,screen_share_percen,total_duration_spoken,ratio_spoken_recorded,avg_meeting_turn_duration,total_turns,turns_in_a_min,average_meeting_words_message,total_words,words_in_a_min,avg_indiv_spoken_time,stdev_indiv_spoken_time,min_indiv_spoken_time,max_indiv_spoken_time,diff_indiv_spoken_time,avg_indiv_spoken_time_ratio,stdev_indiv_spoken_time_ratio,min_indiv_spoken_time_ratio,max_indiv_spoken_time_ratio,diff_indiv_spoken_time_ratio,avg_average_turn_duration,stdev_average_turn_duration,min_average_turn_duration,max_average_turn_duration,diff_average_turn_duration,avg_average_turn_duration_ratio,stdev_average_turn_duration_ratio,min_average_turn_duration_ratio,max_average_turn_duration_ratio,diff_average_turn_duration_ratio,avg_avg_time_without_speaking,stdev_avg_time_without_speaking,min_avg_time_without_speaking,max_avg_time_without_speaking,diff_avg_time_without_speaking,avg_avg_time_without_speaking_ratio,stdev_avg_time_without_speaking_ratio,min_avg_time_without_speaking_ratio,max_avg_time_without_speaking_ratio,diff_avg_time_without_speaking_ratio,avg_max_time_without_speaking,stdev_max_time_without_speaking,min_max_time_without_speaking,max_max_time_without_speaking,diff_max_time_without_speaking,...,avg_max_words_turn,stdev_max_words_turn,min_max_words_turn,max_max_words_turn,diff_max_words_turn,avg_max_words_turn_ratio,stdev_max_words_turn_ratio,min_max_words_turn_ratio,max_max_words_turn_ratio,diff_max_words_turn_ratio,avg_speech_neu,stdev_speech_neu,min_speech_neu,max_speech_neu,diff_speech_neu,avg_speech_ang,stdev_speech_ang,min_speech_ang,max_speech_ang,diff_speech_ang,avg_speech_hap,stdev_speech_hap,min_speech_hap,max_speech_hap,diff_speech_hap,avg_speech_sad,stdev_speech_sad,min_speech_sad,max_speech_sad,diff_speech_sad,avg_text_joy,stdev_text_joy,min_text_joy,max_text_joy,diff_text_joy,avg_text_anger,stdev_text_anger,min_text_anger,max_text_anger,diff_text_anger,avg_text_fear,stdev_text_fear,min_text_fear,max_text_fear,diff_text_fear,avg_text_sadness,stdev_text_sadness,min_text_sadness,max_text_sadness,diff_text_sadness
0,string,int,int,int (seg),int (seg),float,float,int(seg),float,float,int,float,float,int,float,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,input,input,input,input,Σ len(sec) if\nscreen_share=True,count (shown_face==true)/\nnum_members,time_share_screen /\ntotal_duration_recorded,Σ indiv_time_spoken,total_duration_spoken/\ntotal_duration_recorded,Σ each probability *\naverage_turn_duration,Σ num_turns,total_turns * 60\n/ total_duration_recorded,num_words/ num_turns,Σ avg_words_turn*\neach probability,total_words * 60\n/ total_duration_recorded,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,name of\nthe group,number of\nintegrants,number of\nrecorded meetings,duration of all\nof the recordings,duration of\nscreen share,percentage of members\nthat turn on camera,ratio between the\nscreen has ben shared,duration of the overall\nspoken time,ratio between the spoken\nand recorded time,most common duration\nof a turn in the convers...,total turns in\nthe conversation,how many turns are\nin a minute,most common number of\nwords in a meeting message,total words in\nthe conversation,how many words are\nin a minute,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,by_hand,by_hand,by_hand,by_hand,by_hand,members_show_face_percen(),screen_share_percen(),total_duration_spoken(),ratio_spoken_recorded(),avg_meeting_turn_duration(),total_turns(),turns_in_a_min(),average_meeting_words_message(),total_words(),words_in_a_min(),,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,GM BEAUVOIR,5,2,4366,0,0.0,0.0,3546,0.812185,17.554455,202,2.775996,37.814332,5726,78.689876,709.2,1097.958424,105.0,2648.0,2543.0,0.2,0.309633,0.029611,0.746757,0.717146,12.376541,12.103957,5.526316,33.948718,28.422402,0.2,0.195595,0.089303,0.548598,0.459295,67.741075,53.227632,21.791169,159.4216,137.630431,0.2,0.15715,0.064337,0.470679,0.406343,341.632,308.4794,109.24,836.46,727.22,...,173.0,251.782644,43.0,623.0,580.0,0.2,0.291078,0.049711,0.720231,0.67052,0.775378,0.046539,0.712697,0.832695,0.119998,0.168865,0.025321,0.137502,0.196289,0.058787,0.048247,0.01947,0.026347,0.077861,0.051514,0.00751,0.004729,0.003074,0.013153,0.010079,0.298636,0.00387,0.29295,0.303593,0.010643,0.237141,0.001936,0.235307,0.239695,0.004388,0.231551,0.002363,0.227768,0.233832,0.006064,0.232672,0.004448,0.227198,0.239587,0.012389
5,GM CHOMSKY,5,2,4580,3133,1.0,0.684061,3986,0.870306,9.184332,434,5.68559,29.150447,12230,160.218341,797.2,538.290535,276.0,1369.0,1093.0,0.2,0.135045,0.069242,0.343452,0.27421,9.171113,2.048187,6.272727,11.461538,5.188811,0.2,0.044666,0.136793,0.249949,0.113156,56.055844,60.303227,19.887848,162.1672,142.279352,0.2,0.215154,0.070957,0.578592,0.507634,764.432,661.163226,169.82,1529.54,1359.72,...,197.0,75.640598,78.0,257.0,179.0,0.2,0.076792,0.079188,0.260914,0.181726,0.752244,0.029104,0.710042,0.782264,0.072222,0.208808,0.017429,0.18186,0.228609,0.046749,0.033932,0.022035,0.01221,0.066733,0.054523,0.005017,0.003165,0.001577,0.009187,0.00761,0.297056,0.002517,0.293694,0.300151,0.006457,0.237285,0.002522,0.233183,0.239446,0.006263,0.228015,0.002088,0.225465,0.230322,0.004857,0.237644,0.001313,0.236344,0.239646,0.003302
6,GM DESCARTES,6,3,1888,1888,0.0,1.0,1597,0.845869,10.303226,155,4.925847,24.082221,3292,104.618644,266.166667,343.914767,21.0,943.0,922.0,0.166667,0.215351,0.01315,0.590482,0.577332,7.907124,4.658233,3.0,16.258621,13.258621,0.166667,0.098186,0.063234,0.3427,0.279466,129.171568,133.198998,30.855517,385.191429,354.335911,0.166667,0.171863,0.039812,0.497002,0.45719,1066.68,543.265572,62.02,1465.98,1403.96,...,83.333333,71.486129,16.0,217.0,201.0,0.166667,0.142972,0.032,0.434,0.402,0.868442,0.02158,0.84247,0.900639,0.05817,0.118787,0.028246,0.071002,0.148914,0.077913,0.011698,0.007564,0.007156,0.026979,0.019823,0.001073,0.000646,0.00025,0.001907,0.001657,0.26211,0.004434,0.257801,0.268266,0.010465,0.263759,0.004913,0.257601,0.269587,0.011987,0.24427,0.001356,0.242891,0.246609,0.003717,0.229862,0.003307,0.223415,0.232435,0.009021
7,GM FOUCAULT,4,3,3525,3525,0.0,1.0,2862,0.811915,9.508306,301,5.123404,25.489195,6890,117.276596,715.5,463.244716,281.0,1357.0,1076.0,0.25,0.16186,0.098183,0.474144,0.375961,8.848999,3.656503,5.403846,13.989691,8.585845,0.25,0.103303,0.152668,0.395234,0.242565,50.02965,19.876986,29.030928,75.503846,46.472918,0.25,0.099326,0.145069,0.377295,0.232227,769.865,47.936757,727.56,828.7,101.14,...,102.75,32.221887,66.0,142.0,76.0,0.25,0.078399,0.160584,0.345499,0.184915,0.755367,0.01558,0.740058,0.769652,0.029594,0.212465,0.014632,0.19692,0.230877,0.033958,0.028399,0.014506,0.013313,0.047375,0.034062,0.003768,0.00262,0.00064,0.006897,0.006258,0.29677,0.004588,0.291022,0.301269,0.010247,0.237875,0.00273,0.235008,0.241411,0.006402,0.228296,0.000565,0.227542,0.228907,0.001365,0.237059,0.001938,0.235355,0.238811,0.003457
8,GM KANT,5,2,9802,9802,0.0,1.0,7880,0.803918,24.702194,319,1.952663,55.835245,10333,63.250357,1576.0,1781.610648,128.0,3950.0,3822.0,0.2,0.226093,0.016244,0.501269,0.485025,22.007453,24.644022,4.571429,61.673469,57.102041,0.2,0.223961,0.041544,0.560478,0.518934,145.393566,80.843432,45.003906,270.042143,225.038237,0.2,0.111206,0.061906,0.371464,0.309557,1613.936,1353.842423,392.5,3633.44,3240.94,...,172.6,232.543114,20.0,565.0,545.0,0.2,0.269459,0.023175,0.654693,0.631518,0.810359,0.068063,0.714929,0.900524,0.185595,0.160466,0.043031,0.08993,0.199756,0.109825,0.023756,0.029038,0.005262,0.074852,0.06959,0.00542,0.008637,0.000153,0.02077,0.020617,0.301995,0.008765,0.287437,0.310425,0.022989,0.234743,0.006113,0.228331,0.244249,0.015918,0.229425,0.002118,0.227561,0.232449,0.004888,0.233838,0.00429,0.228794,0.240463,0.011668
9,GM MINSKY,5,1,591,0,0.0,0.0,478,0.808799,6.547945,73,7.411168,23.815146,1588,161.218274,95.6,47.851855,23.0,131.0,108.0,0.2,0.100108,0.048117,0.274059,0.225941,6.298049,2.87843,2.090909,9.214286,7.123377,0.2,0.091407,0.066399,0.292608,0.226209,30.061178,9.184407,22.052857,45.156364,23.103506,0.2,0.061105,0.14672,0.30043,0.15371,96.856,48.498749,58.68,155.86,97.18,...,56.0,29.89147,13.0,84.0,71.0,0.2,0.106755,0.046429,0.3,0.253571,0.824194,0.040583,0.770457,0.881454,0.110997,0.152385,0.036157,0.101825,0.198485,0.09666,0.021847,0.009797,0.010692,0.033656,0.022963,0.001574,0.001605,0.000234,0.004333,0.004099,0.295422,0.006673,0.287701,0.304738,0.017038,0.23885,0.004848,0.23074,0.243578,0.012838,0.228804,0.003632,0.22471,0.234092,0.009382,0.236924,0.003703,0.23043,0.23924,0.008811
