In [101]:
import pandas as pd

In [102]:
def read_in_data(header_names):
    return pd.read_csv('../data/genderedCorpus.csv', sep=',', names=header_names)

def reciever_and_caller_column(df, header_names, gender_and_position):
    call_id = get_all_call_ids(df)
    new_df = pd.DataFrame(columns=header_names)
    for call in call_id:
        temp_df = get_call_df(df, call)
        for gp in gender_and_position:
            if temp_df['person_and_type'].str.contains(gp).any():
                if "receiver" in gp:
                    temp_df["receiver"].fillna(gp, inplace=True)
                else:
                    temp_df["caller"].fillna(gp, inplace=True)
                
        new_df = pd.concat([new_df, temp_df])
    return new_df

        
        

def get_conversation_topic_df(df, topic):
    """
    Gets the rows that match the conversation topic specified.
    """
    return df.loc[df['conversation_topic'] == topic]

def remove_conversation_topic_df(df, topic):
    """
    Gets the rows that do not contain a specific topic. Can pass a string containing topics to remove, delimited by a comma
    """
    for individual_topic in topic.split(","):
        df = df.loc[df['conversation_topic'] != individual_topic]
    return df

def get_call_df(df, call_id):
    """
    This gets the rows containing the call_id. 
    """
    return df.loc[df['call'] == call_id] 

def get_all_call_ids(df):
    """
    Returns an array of the names of the call ids
    """
    return df.call.unique()

def get_all_conversation_topics(df):
    """
    Returns array of all the names of the conversation topics
    """
    return df.conversation_topic.unique()
header_names = ["call", "conversation_topic", "person_and_type","start", "end", "caller", "receiver"]
gender_and_position = {"caller_F", "receiver_F", "caller_M", "receiver_M"} 
gender_types = dict.fromkeys(gender_and_position,0)
cue_types = {"laughter": 0, "silence": 0, "filler": 0, "bc": 0}
df = read_in_data(header_names)

In [103]:
def occurrence_of_each_event(df, types):
    """
    This takes in a dataframe and produces the total number of each conversation cue occurred.
    """
    occurrences = df['person_and_type'].value_counts()
    for occurrence_name in occurrences.index:
        for cue_type in types:            
            if occurrence_name.count(cue_type) > 1:
                types[cue_type] += (occurrences[occurrence_name] * 2)
            elif occurrence_name.count(cue_type) == 1:
                types[cue_type] += occurrences[occurrence_name]
    return types

def total_time_of_each_event(df, types):
    """
    This calculates the total time of each cue in the dataset, returning a dictionary of each cue
    """
    for cue in types:
        new_df = df[df["person_and_type"].str.contains(cue)] # for each cue, get df of all rows containing cue
        
        types[cue] = (new_df["end"] - new_df["start"]).sum(axis=0) # end time - start time and sum them all
    return types

def mean_time_of_each_event(df, types):
    """
    Gets the mean of each conversation cue. Returns a dictionary.
    """
    for cue in types:
        new_df = df[df["person_and_type"].str.contains(cue)]
        types[cue] = (new_df["end"] - new_df["start"]).mean(axis=0)
    return types

def mean_call_length(df):
    total = 0
    ids = get_all_call_ids(df)
    for call in ids:
        new_df = df[df["call"].str.match(call)]
        total += new_df.iloc[-1]["end"]
    return total / len(ids)

In [104]:
occ_count = occurrence_of_each_event(df, cue_types.copy())
time_total = total_time_of_each_event(df, cue_types.copy())
mean_time = mean_time_of_each_event(df, cue_types.copy())
overlap_occurrences = occurrence_of_each_event(df, gender_types.copy())
overlap_total_time = total_time_of_each_event(df, gender_types.copy())
overlap_mean_time = mean_time_of_each_event(df, gender_types.copy())
df = reciever_and_caller_column(df, header_names, gender_and_position)
print(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


      call conversation_topic         person_and_type    start      end  \
0      F01              other                 silence    0.000    1.273   
1      F01              other              receiver_M    1.273    1.581   
2      F01              other                 silence    1.581    1.975   
3      F01              other                caller_F    1.975    2.479   
4      F01              other                 silence    2.479    3.534   
...    ...                ...                     ...      ...      ...   
29854  F60              other    caller_F laughter_rF  339.157  339.622   
29855  F60              other             laughter_cF  339.622  340.007   
29856  F60              other  laughter_cF receiver_F  340.007  340.340   
29857  F60              other                caller_F  340.340  341.091   
29858  F60                end                     end  341.091  344.127   

         caller    receiver  
0      caller_F  receiver_M  
1      caller_F  receiver_M  
2      ca