# Individual Feature Determination

## Preparation

### Import

In [1]:
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Function Definition

### Add Ratio given a column

In [3]:
def add_ratio_to_column(df, id_col, group_col, column, start_row):
  
    # Calculate the sum of spoken_time for each group
    group_sum = df.groupby(group_col)[column].transform('sum')

    # Calculate the spoken_time ratio
    ratio_column = column + "_ratio"
    column_index = df.columns.get_loc(column)

    # Insert the ratio_column next to the column received as a parameter
    df.insert(column_index + 1, ratio_column, np.nan)

    # Assign predefined values for the first three rows before start_row
    df.loc[start_row - 3:start_row - 1, ratio_column] = [df[column].iloc[start_row - 3],
                                                         "ratio of " + str(df[column].iloc[start_row - 2]),
                                                         "add_ratio_to_column()"]

    # Perform calculations for the ratio column from start_row onwards
    df.loc[start_row:, ratio_column] = df.loc[start_row:, column] / group_sum

    # Return the modified DataFrame
    return df



### Add Ratio given a Range

In [4]:
def add_ratios_range(df, start_col, end_col):

  cols_range = df.columns[df.columns.get_loc(start_col):df.columns.get_loc(end_col)+1].tolist()
    
  # Loop through the emotion columns and calculate the average for each
  for col in cols_range:
      df= add_ratio_to_column(df, "ID", "group", col, 3)

  return df

### Add Ratios to Features

In [5]:
def add_ratios_to_features(indiv_features_file, file_out):

  df= pd.read_excel(indiv_features_file)
  df= add_ratios_range(df, "indiv_spoken_time", "max_words_turn")
  df.to_csv(file_out,encoding='utf-8-sig', index=False)

  return df

## Use of Function

In [6]:
indiv_features_file = r'/content/drive/MyDrive/Projects/tps/data/12. features/3_individual_features_final.xlsx'
file_out= r'/content/drive/MyDrive/Projects/tps/data/12. features/4_individual_features_final_ratio.xlsx'
df= pd.read_excel(indiv_features_file)

In [7]:
df= add_ratios_to_features(indiv_features_file, file_out)
df.head()

Unnamed: 0,ID,group,shown_face,indiv_spoken_time,indiv_spoken_time_ratio,average_turn_duration,average_turn_duration_ratio,avg_time_without_speaking,avg_time_without_speaking_ratio,max_time_without_speaking,...,max_words_turn,max_words_turn_ratio,speech_neu,speech_ang,speech_hap,speech_sad,text_joy,text_anger,text_fear,text_sadness
0,Type,string,boolean,int (seg),int (seg),float (seg),float (seg),int (seg),int (seg),int (seg),...,int (words),int (words),float,float,float,float,float,float,float,float
1,Description,id of the group,activated the camera\nat the meeting?,total speaking time\nof each individual,ratio of total speaking time\nof each individual,average time of\nspoken intervention,ratio of average time of\nspoken intervention,average time without\nspeaking,ratio of average time without\nspeaking,max time without\nspeaking,...,maximum words\nused per turn,ratio of maximum words\nused per turn,average neutral\nemotion in voice,average anger\nemotion in voice,average happy\nemotion in voice,average sad\nemotion in voice,average joy\nemotion in text,average anger\nemotion in text,average fear\nemotion in text,average sad\nemotion in text
2,Script,byhand_input,byhand_input,calc_individual_time_spoken(),add_ratio_to_column(),avg_turn_duration(),add_ratio_to_column(),max_and_avg_time_without_speaking(),add_ratio_to_column(),,...,max_words_turn(),add_ratio_to_column(),calc_avg_emotions,,,,,,,
3,1,1,False,526,0.148336,7.850746,0.126865,56.210746,0.165958,451.54,...,68,0.078613,0.832695,0.137502,0.026347,0.003456,0.298809,0.236814,0.232361,0.232015
4,2,1,False,2648,0.746757,33.948718,0.548598,21.791169,0.064337,109.24,...,623,0.720231,0.795958,0.155654,0.045315,0.003074,0.29295,0.239695,0.227768,0.239587
