# Unify all Meetings csv into a single File

## Preparation

### Import

In [8]:
import pandas as pd
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Function Definition

### Adjust Times in the Concatenated Dataframes

In [9]:
def adjust_time_concatenated_dataframes(df):
    
    # find all the numbers that are smaller than the following number
    indices = []
    for index, row in df.iterrows():
        if index < len(df) - 1:
            if row['Start time (s)'] > df.loc[index+1, 'Start time (s)']:
                indices.append(index)
    
    # add 1 to all the numbers below each found number
    for i in indices:
        df.loc[i+1:, 'Start time (s)'] += df.loc[i, 'End time (s)']
        df.loc[i+1:, 'End time (s)'] += df.loc[i, 'End time (s)']
    
    return df

### Concatenate all the given files

In [10]:
def unify_meetings(*filepaths, directory_out):
    dfs = []
    for filepath in filepaths:
        if not filepath.endswith(".csv"):
            continue
        df = pd.read_csv(filepath, encoding="utf-8")
        dfs.append(df)

    # Concatenate every dataset in the list and adjust time
    cleaned_df = adjust_time_concatenated_dataframes(pd.concat(dfs, axis=0, ignore_index=True))

    ##### Make unify same speaker nexts

    # Output the csv
    file_out = os.path.join(directory_out, str((os.path.basename(filepaths[0])).split(".")[0]) + "_speech.csv")
    cleaned_df.to_csv(file_out, index=False)

    # Print the name of the output file with a counter
    print(f"Outputting file {file_out}")

### Concatenate all the csv in a directory

In [11]:
def concatenate_csv(directory_path):
    
    # Create an empty list to store all dataframes
    df_list = []
    documents_read = 0
    # Loop through all files in the folder
    for file_name in os.listdir(directory_path):
        # Check if the file is a .csv file
        if file_name.endswith('.csv'):
            # Read the .csv file into a dataframe
            file_path = os.path.join(directory_path, file_name)
            df = pd.read_csv(file_path)
            # Append the dataframe to the list
            df_list.append(df)
            documents_read = documents_read+1

    # Concatenate every dataset in the list and adjust time
    cleaned_df= adjust_time_concatenated_dataframes(pd.concat(df_list, axis=0, ignore_index=True))
    
    ##### Make unify same speaker nexts
    
    # Sanity stats
    cleaned_df.head(1000)
    print("\n"+str(documents_read)+" files processed")
    
    # Output the csv
    file_out = directory_path + "\\" + os.path.basename(directory_path) + "_unified.csv"
    cleaned_df.to_csv(file_out, encoding='utf-8-sig', index=False)

### Group Meetings of the same Team

In [12]:
def group_files(directory_in, directory_out):
    files_dict = {}
    for filename in os.listdir(directory_in):
        if filename.endswith('.csv'):
            file_number = filename.split('.')[0]
            if file_number not in files_dict:
                files_dict[file_number] = []
            files_dict[file_number].append(os.path.join(directory_in, filename))
    for file_list in files_dict.values():
        unify_meetings(*file_list, directory_out=directory_out)

## Use of Function

In [13]:
directory_in = r'/content/drive/MyDrive/Projects/tps/data/9. speech_emotions'
directory_out = r'/content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated'

In [15]:
group_files(directory_in,directory_out)

Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/1_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/10_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/12_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/4_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/2_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/3_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/11_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/6_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/5_speech.csv
Outputting file /content/drive/MyDrive/Projects/tps/data/10. speech_meetings_aggregated/