In [1]:
import os
import pandas as pd

In [2]:
# Function to parse filename and extract label
def parse_label(file_name, dataset):
    if dataset == "TESS":
        parts = file_name.split('_')
        emotion = parts[-1]
        emotion= emotion.split('.')[0]
    elif dataset == "RAVDESS":
        parts = file_name.split('-')
        emotion = parts[2]  # For example, use specific indices based on RAVDESS filename convention
    elif dataset == "CREMA-D":
        emotion = file_name.split('_')[2]
    return emotion

def create_dataframe(dataset_path, dataset_name):
    data = []
    for root, dirs, files in os.walk(dataset_path):
        for file_name in files:
            if file_name.endswith(".wav"):
                emotion = parse_label(file_name, dataset_name)
                file_path = os.path.join(root, file_name)
                data.append({"file_path": file_path, "emotion": emotion, "dataset": dataset_name})
    return pd.DataFrame(data)

In [3]:
# Paths to datasets
ravdess_path = "C:\EMOTION\Ravdess Set"
cremad_path = "C:\EMOTION\Crema D Set\AudioWAV"
tess_path = "C:\EMOTION\Tess Set\TESS Toronto emotional speech set data"

In [4]:
# Create dataframes for each dataset
df_ravdess = create_dataframe(ravdess_path, "RAVDESS")
df_cremad = create_dataframe(cremad_path, "CREMA-D")
df_tess = create_dataframe(tess_path, "TESS")

# Combine all datasets into one dataframe
df = pd.concat([df_ravdess, df_cremad, df_tess], ignore_index=True)

# Show a preview of the combined dataframe
print(df.head())


                                           file_path emotion  dataset
0  C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-01...      01  RAVDESS
1  C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-01...      01  RAVDESS
2  C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-02...      01  RAVDESS
3  C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-02...      01  RAVDESS
4  C:\EMOTION\Ravdess Set\Actor_01\03-01-02-01-01...      02  RAVDESS


In [5]:
df.to_csv("datasetwithemotion.csv")

In [6]:
TESS = (df["dataset"] == "TESS").sum()
print(TESS)


2800


In [7]:
RAVDESS = (df["dataset"] == "RAVDESS").sum()
print(RAVDESS)


1440


In [8]:
CREMA = (df["dataset"] == "CREMA-D").sum()
print(CREMA)


7442


In [9]:
crema_mapping = {
    'ANG': 'Anger',
    'DIS': 'Disgust',
    'FEA': 'Fear',
    'HAP': 'Happiness',
    'NEU': 'Neutral',
    'SAD': 'Sadness'
}


In [10]:
ravdess_mapping = {
    '01': 'Neutral',
    '02': 'Neutral',  # If you want to treat 'Calm' as 'Neutral', else create a separate 'Calm' category
    '03': 'Happiness',
    '04': 'Sadness',
    '05': 'Anger',
    '06': 'Fear',
    '07': 'Disgust',
    '08': 'Surprise'
}


In [11]:
tess_df = df[df["dataset"] == "TESS"]


In [12]:
# Get unique label names
unique_labels = tess_df["emotion"].unique()

In [13]:
unique_labels

array(['angry', 'disgust', 'fear', 'happy', 'neutral', 'ps', 'sad'],
      dtype=object)

In [14]:
# ['angry', 'disgust', 'fear', 'happy', 'neutral', 'ps', 'sad']
tess_mapping = {
    'angry': 'Anger',
    'disgust': 'Disgust',
    'fear': 'Fear',
    'happy': 'Happiness',
    'neutral': 'Neutral',
    'ps': 'Surprise',  # Map "Pleasant Surprise" to "Surprise"
    'sad': 'Sadness'
}


In [15]:
dataset_before_mapping_new_label_names= df.copy()

In [16]:
dataset_before_mapping_new_label_names.to_csv("dataset_before_giving_labelname.csv")

In [17]:
dataset_before_mapping_new_label_names.head()

Unnamed: 0,file_path,emotion,dataset
0,C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-01...,1,RAVDESS
1,C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-01...,1,RAVDESS
2,C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-02...,1,RAVDESS
3,C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-02...,1,RAVDESS
4,C:\EMOTION\Ravdess Set\Actor_01\03-01-02-01-01...,2,RAVDESS


In [18]:
df.head()

Unnamed: 0,file_path,emotion,dataset
0,C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-01...,1,RAVDESS
1,C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-01...,1,RAVDESS
2,C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-02...,1,RAVDESS
3,C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-02...,1,RAVDESS
4,C:\EMOTION\Ravdess Set\Actor_01\03-01-02-01-01...,2,RAVDESS


In [19]:
def map_emotions(row):
    dataset = row['dataset']  # Assuming you have a 'dataset' column identifying the dataset
    emotion = row['emotion']

    if dataset == 'CREMA-D':
        return crema_mapping.get(emotion, emotion)
    elif dataset == 'RAVDESS':
        return ravdess_mapping.get(emotion, emotion)
    elif dataset == 'TESS':
        return tess_mapping.get(emotion, emotion)
    else:
        return emotion  # If it's an unknown dataset, return the original emotion

# Apply the mapping
df['emotion'] = df.apply(map_emotions, axis=1)

# Check the updated dataframe
print(df.head())


                                           file_path  emotion  dataset
0  C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-01...  Neutral  RAVDESS
1  C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-01...  Neutral  RAVDESS
2  C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-02...  Neutral  RAVDESS
3  C:\EMOTION\Ravdess Set\Actor_01\03-01-01-01-02...  Neutral  RAVDESS
4  C:\EMOTION\Ravdess Set\Actor_01\03-01-02-01-01...  Neutral  RAVDESS


In [20]:

df["emotion"].unique()

array(['Neutral', 'Happiness', 'Sadness', 'Anger', 'Fear', 'Disgust',
       'Surprise'], dtype=object)

In [21]:
df.to_csv("dataset_with_label.csv")

In [22]:
df.columns

Index(['file_path', 'emotion', 'dataset'], dtype='object')

In [25]:
import os
import pandas as pd
import subprocess
import uuid

# Load the dataset
df = df

# Path to OpenSMILE and configuration file

output_dir = r'C:\EMOTION\Emotion\generated_files'
opensmile_path = r'C:\opensmile-3.0.2-windows-x86_64\bin\SMILExtract.exe'
config_file = r'C:\opensmile-3.0.2-windows-x86_64\config\is09-13\IS10_paraling_compat.conf'

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

for index, row in df.iterrows():
    file_path = row['file_path']
    label= row["emotion"]
    uuid_= str(uuid.uuid1())
    newfilename= f"{os.path.basename(file_path)}.arff".split('.')[0] + "__" +uuid_ + "__" + label
    # Define output ARFF file
    output_file = os.path.join(output_dir, f"{newfilename}.arff")
    
    command = [
        opensmile_path,
        "-C", config_file,          # Configuration file
        "-I", file_path,            # Input .wav file
        "-O", output_file            # Output CSV file
    ]
    subprocess.run(command)

    


In [28]:
import arff
import pandas as pd
import glob
import os

# Path to the directory containing the ARFF files
arff_dir = r'C:\EMOTION\Emotion\generated_files'

# List all ARFF files in the directory
arff_files = glob.glob(os.path.join(arff_dir, '*.arff'))

# Initialize an empty list to store DataFrames
dfs = []

def arff_to_df(arff_file):
    # Read ARFF file using liac-arff
    with open(arff_file, 'r') as f:
        arff_data = arff.load(f)  # Use arff.load() for liac-arff
    # Create DataFrame from ARFF data
    df = pd.DataFrame(arff_data['data'], columns=[attr[0] for attr in arff_data['attributes']])
    return df

# Convert each ARFF file to a DataFrame and append to the list
for arff_file in arff_files:
    df = arff_to_df(arff_file)
    # Extract name and class from file name
    base_name = os.path.basename(arff_file)  # Get file name with extension
    base_name_list= base_name.split("__")
    name= base_name_list[0]
    label= base_name_list[-1].split(".")[0]
    # Add new columns
    df['name'] = name
    df['class'] = label
    dfs.append(df)

# Concatenate all DataFrames into one
combined_df = pd.concat(dfs, ignore_index=True)


# Path to the output CSV file
# output_csv = 'D:\VocalCoach\Project_Roger_Love\arff_files\combined_features.csv'

# # Save combined DataFrame to CSV
# combined_df.to_csv(output_csv, index=False)

# print(f"Combined CSV saved to: {output_csv}")


In [29]:
combined_df

Unnamed: 0,name,pcm_loudness_sma_maxPos,pcm_loudness_sma_minPos,pcm_loudness_sma_amean,pcm_loudness_sma_linregc1,pcm_loudness_sma_linregc2,pcm_loudness_sma_linregerrA,pcm_loudness_sma_linregerrQ,pcm_loudness_sma_stddev,pcm_loudness_sma_skewness,...,shimmerLocal_sma_de_quartile3,shimmerLocal_sma_de_iqr1-2,shimmerLocal_sma_de_iqr2-3,shimmerLocal_sma_de_iqr1-3,shimmerLocal_sma_de_percentile99.0,shimmerLocal_sma_de_upleveltime75,shimmerLocal_sma_de_upleveltime90,F0final__Turn_numOnsets,F0final__Turn_duration,class
0,03-01-01-01-01-01-01,111.0,0.0,0.057230,1.474657e-05,0.054864,0.066691,0.006702,0.081877,1.456113,...,0.035178,0.033296,0.036901,0.070197,0.209470,0.054945,0.010989,7.0,3.24,Neutral
1,03-01-01-01-01-01-02,143.0,0.0,0.075299,7.177335e-06,0.074028,0.085198,0.012052,0.109786,1.669832,...,0.021368,0.024951,0.020953,0.045904,0.099754,0.084034,0.050420,7.0,3.57,Neutral
2,03-01-01-01-01-01-03,158.0,3.0,0.077504,7.861485e-05,0.064375,0.081762,0.010437,0.102442,1.506416,...,0.035573,0.036146,0.038547,0.074694,0.169679,0.058252,0.009709,8.0,3.37,Neutral
3,03-01-01-01-01-01-04,148.0,0.0,0.057218,3.307781e-05,0.051909,0.065319,0.006417,0.080163,1.471950,...,0.020052,0.021583,0.019544,0.041126,0.105876,0.099237,0.015267,8.0,3.24,Neutral
4,03-01-01-01-01-01-05,114.0,0.0,0.045955,7.809049e-07,0.045818,0.046514,0.003756,0.061286,1.919757,...,0.014323,0.017729,0.017090,0.034819,0.133458,0.053191,0.021277,8.0,3.54,Neutral
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11675,YAF_youth_fear,115.0,0.0,0.641203,2.084736e-05,0.639483,0.326869,0.166363,0.407878,0.725198,...,0.006354,0.005625,0.007331,0.012956,0.040863,0.031496,0.007874,6.0,1.68,Fear
11676,YAF_youth_happy,97.0,196.0,0.694067,-9.787022e-04,0.792916,0.421081,0.277997,0.530365,1.039440,...,0.005358,0.008652,0.005816,0.014468,0.073950,0.026316,0.006579,4.0,2.05,Happiness
11677,YAF_youth_neutral,107.0,0.0,0.365873,-7.653548e-04,0.444705,0.180104,0.047840,0.223455,0.529938,...,0.006715,0.006540,0.007431,0.013971,0.131697,0.018987,0.012658,2.0,2.09,Neutral
11678,YAF_youth_ps,29.0,167.0,0.507129,2.123787e-04,0.489289,0.231371,0.083157,0.288555,0.807366,...,0.012068,0.009116,0.013140,0.022256,0.076742,0.027778,0.006944,8.0,1.71,Surprise


In [30]:
combined_df["class"].unique()

array(['Neutral', 'Happiness', 'Sadness', 'Anger', 'Fear', 'Disgust',
       'Surprise'], dtype=object)

In [32]:
combined_df.to_csv("labelled_dataset.csv")

In [33]:
combined_df.columns

Index(['name', 'pcm_loudness_sma_maxPos', 'pcm_loudness_sma_minPos',
       'pcm_loudness_sma_amean', 'pcm_loudness_sma_linregc1',
       'pcm_loudness_sma_linregc2', 'pcm_loudness_sma_linregerrA',
       'pcm_loudness_sma_linregerrQ', 'pcm_loudness_sma_stddev',
       'pcm_loudness_sma_skewness',
       ...
       'shimmerLocal_sma_de_quartile3', 'shimmerLocal_sma_de_iqr1-2',
       'shimmerLocal_sma_de_iqr2-3', 'shimmerLocal_sma_de_iqr1-3',
       'shimmerLocal_sma_de_percentile99.0',
       'shimmerLocal_sma_de_upleveltime75',
       'shimmerLocal_sma_de_upleveltime90', 'F0final__Turn_numOnsets',
       'F0final__Turn_duration', 'class'],
      dtype='object', length=1584)

In [36]:
combined_df.head()

Unnamed: 0,name,pcm_loudness_sma_maxPos,pcm_loudness_sma_minPos,pcm_loudness_sma_amean,pcm_loudness_sma_linregc1,pcm_loudness_sma_linregc2,pcm_loudness_sma_linregerrA,pcm_loudness_sma_linregerrQ,pcm_loudness_sma_stddev,pcm_loudness_sma_skewness,...,shimmerLocal_sma_de_quartile3,shimmerLocal_sma_de_iqr1-2,shimmerLocal_sma_de_iqr2-3,shimmerLocal_sma_de_iqr1-3,shimmerLocal_sma_de_percentile99.0,shimmerLocal_sma_de_upleveltime75,shimmerLocal_sma_de_upleveltime90,F0final__Turn_numOnsets,F0final__Turn_duration,class
0,03-01-01-01-01-01-01,111.0,0.0,0.05723,1.474657e-05,0.054864,0.066691,0.006702,0.081877,1.456113,...,0.035178,0.033296,0.036901,0.070197,0.20947,0.054945,0.010989,7.0,3.24,Neutral
1,03-01-01-01-01-01-02,143.0,0.0,0.075299,7.177335e-06,0.074028,0.085198,0.012052,0.109786,1.669832,...,0.021368,0.024951,0.020953,0.045904,0.099754,0.084034,0.05042,7.0,3.57,Neutral
2,03-01-01-01-01-01-03,158.0,3.0,0.077504,7.861485e-05,0.064375,0.081762,0.010437,0.102442,1.506416,...,0.035573,0.036146,0.038547,0.074694,0.169679,0.058252,0.009709,8.0,3.37,Neutral
3,03-01-01-01-01-01-04,148.0,0.0,0.057218,3.307781e-05,0.051909,0.065319,0.006417,0.080163,1.47195,...,0.020052,0.021583,0.019544,0.041126,0.105876,0.099237,0.015267,8.0,3.24,Neutral
4,03-01-01-01-01-01-05,114.0,0.0,0.045955,7.809049e-07,0.045818,0.046514,0.003756,0.061286,1.919757,...,0.014323,0.017729,0.01709,0.034819,0.133458,0.053191,0.021277,8.0,3.54,Neutral
