In [5]:
import opensmile
import pandas as pd

In [6]:
df = pd.read_csv('dataset.csv')
df.head()
example_path = df['file_path'][0]

### Exploring the number of features

In [3]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

y = smile.process_file(example_path)
y.shape

(1, 88)

In [4]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.GeMAPSv01b,
    feature_level=opensmile.FeatureLevel.Functionals,
)

y = smile.process_file(example_path)
y.shape

(1, 62)

In [5]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.emobase ,
    feature_level=opensmile.FeatureLevel.Functionals,
)

y = smile.process_file(example_path)
y.shape

(1, 988)

In [6]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016 ,
    feature_level=opensmile.FeatureLevel.Functionals,
)

y = smile.process_file(example_path)
y.shape

(1, 6373)

### Process with eGeMAPSv02

In [7]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)

y = smile.process_file(example_path)

In [8]:
from tqdm import tqdm
def generate_df(file_paths):
    dfs = []  # List to accumulate DataFrames
    
    for path in tqdm(file_paths, desc="Processing files"):
        # Assuming smile.process_file returns a DataFrame
        y = smile.process_file(path)
        dfs.append(y)  # Append the DataFrame to the list
    
    # Concatenate all DataFrames in the list
    concatenated_df = pd.concat(dfs, ignore_index=True)
    
    return concatenated_df

In [9]:
df_processed = generate_df(df['file_path'])

Processing files: 100%|████████████████████████████████████████████████████████████| 1975/1975 [03:45<00:00,  8.78it/s]


In [10]:
labels = df['emotion'].unique()
labels.sort()
num_clusters = len(labels)

In [11]:
import os

# Set LOKY_MAX_CPU_COUNT environment variable
os.environ["LOKY_MAX_CPU_COUNT"] = "4"  # Replace "4" with the number of cores you want to use

from sklearn.cluster import KMeans

X_train = df_processed.values
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train)

# Get cluster labels for each data point
cluster_labels = kmeans.labels_

from sklearn.metrics import adjusted_rand_score

# Assuming y_true contains the true labels for your data points
# cluster_labels contains the cluster assignments obtained from K-means clustering

ari = adjusted_rand_score(df['emotion'], cluster_labels)
print("Adjusted Rand Index (ARI):", ari)

Adjusted Rand Index (ARI): 0.03947090366749895


In [12]:
df['emotion'][:20].values

array(['happy', 'neutral', 'angry', 'happy', 'neutral', 'sad', 'angry',
       'angry', 'fearful', 'happy', 'boredom', 'neutral', 'sad', 'angry',
       'fearful', 'happy', 'neutral', 'sad', 'angry', 'angry'],
      dtype=object)

In [13]:
cluster_labels[:20]

array([2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2])

This is an extremely low level of agreement on 88 features.

### Try scaling df_preprocess

In [14]:
from sklearn.preprocessing import StandardScaler

# Instantiate the StandardScaler
scaler = StandardScaler()

# Fit the scaler to your data
scaler.fit(df_processed)

# Transform the data using the scaler
df_scaled = scaler.transform(df_processed)

In [15]:
X_train = df_scaled
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train)

# Get cluster labels for each data point
cluster_labels = kmeans.labels_

from sklearn.metrics import adjusted_rand_score

# Assuming y_true contains the true labels for your data points
# cluster_labels contains the cluster assignments obtained from K-means clustering

ari = adjusted_rand_score(df['emotion'], cluster_labels)
print("Adjusted Rand Index (ARI):", ari)
print()
print('Actual Labels:',df['emotion'][:20].values)
print()
print('Unsupervised Labels', cluster_labels[:20])

Adjusted Rand Index (ARI): 0.09575353194440772

Actual Labels: ['happy' 'neutral' 'angry' 'happy' 'neutral' 'sad' 'angry' 'angry'
 'fearful' 'happy' 'boredom' 'neutral' 'sad' 'angry' 'fearful' 'happy'
 'neutral' 'sad' 'angry' 'angry']

Unsupervised Labels [4 3 8 8 3 3 8 8 8 8 3 3 2 8 4 4 3 2 8 8]


### Results
1. Scaling improves unsupervised K-Means Clustering from 0.03 to 0.09 however, 0.09 is still a bad score.

### Check processing

In [16]:
df_processed

Unnamed: 0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,31.188166,0.152560,27.766502,31.412098,36.340343,8.573841,34.608952,11.036856,91.834801,43.389225,...,-0.012351,0.010897,0.218922,4.255319,3.278689,0.156667,0.084591,0.108571,0.036422,-21.647932
1,25.022938,0.148540,20.637066,25.118477,28.660353,8.023287,54.014923,38.583805,27.074680,0.593055,...,-0.020058,0.007849,0.450747,6.250000,3.870968,0.101667,0.055202,0.115714,0.049816,-18.010019
2,34.292320,0.102067,30.504089,33.584766,37.343616,6.839527,35.548016,5.873569,5.734786,25.473356,...,0.014669,0.012394,0.388366,4.838710,3.314917,0.150000,0.073030,0.111429,0.060339,-17.855310
3,37.576572,0.108607,34.083504,37.371193,41.233379,7.149876,30.146019,14.919344,38.302853,20.924076,...,-0.007576,0.015585,0.243682,4.522613,2.577320,0.246000,0.210960,0.100000,0.042032,-18.062223
4,25.040190,0.116655,21.225212,26.198669,27.725224,6.500011,19.461380,10.744596,27.572844,6.934962,...,-0.035085,0.005547,0.444158,6.338028,3.649635,0.148000,0.130138,0.086667,0.026247,-16.109831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1970,44.332615,0.118625,38.580246,43.328392,49.545036,10.964790,29.118887,7.991307,57.634918,27.472385,...,0.118151,0.008159,0.009002,2.064897,0.898204,0.420000,0.351852,0.502500,0.467834,-37.611149
1971,43.539474,0.138447,38.441360,41.650421,50.412521,11.971161,104.980247,99.727791,299.075012,410.541626,...,0.086890,0.008436,0.032158,2.040816,1.550388,0.248333,0.151263,0.321429,0.430429,-33.897774
1972,45.137974,0.160381,38.204502,43.318497,54.491749,16.287247,297.005066,482.484802,80.743454,36.427807,...,0.101797,0.006154,0.036091,2.020202,1.278772,0.284000,0.136029,0.396667,0.468496,-35.436405
1973,43.194508,0.098176,40.304451,42.676632,45.354191,5.049740,155.292587,257.793976,401.537354,709.714722,...,0.068428,0.005634,0.067544,1.912568,1.385042,0.272000,0.211603,0.356667,0.347499,-32.892555


In [17]:
y = smile.process_file(df['file_path'][0])
y

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
data/03a01Fa.wav,0 days,0 days 00:00:01.898250,31.188166,0.15256,27.766502,31.412098,36.340343,8.573841,34.608952,11.036856,91.834801,43.389225,...,-0.012351,0.010897,0.218922,4.255319,3.278689,0.156667,0.084591,0.108571,0.036422,-21.647932


In [18]:
y = smile.process_file(df['file_path'][1])
y

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
data/03a01Nc.wav,0 days,0 days 00:00:01.611250,25.022938,0.14854,20.637066,25.118477,28.660353,8.023287,54.014923,38.583805,27.07468,0.593055,...,-0.020058,0.007849,0.450747,6.25,3.870968,0.101667,0.055202,0.115714,0.049816,-18.010019


In [19]:
y = smile.process_file(df['file_path'][1973])
y

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
data/Actor_24_03-01-08-02-02-01-24.wav,0 days,0 days 00:00:03.670333333,43.194508,0.098176,40.304451,42.676632,45.354191,5.04974,155.292587,257.793976,401.537354,709.714722,...,0.068428,0.005634,0.067544,1.912568,1.385042,0.272,0.211603,0.356667,0.347499,-32.892555


In [20]:
y = smile.process_file(df['file_path'][1974])
y

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
data/Actor_24_03-01-08-02-02-02-24.wav,0 days,0 days 00:00:03.636958333,44.371799,0.112749,39.802952,44.422417,48.187508,8.384556,188.801086,277.191193,392.898987,663.038086,...,0.057204,0.003921,0.04565,2.486188,1.680672,0.213333,0.178668,0.358333,0.419659,-33.3745


The processing is correct, there is no error with the data processing

### Process with GeMAPSv01b

In [21]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.GeMAPSv01b,
    feature_level=opensmile.FeatureLevel.Functionals,
)

from tqdm import tqdm

def generate_df(file_paths):
    dfs = []  # List to accumulate DataFrames
    
    for path in tqdm(file_paths, desc="Processing files"):
        # Assuming smile.process_file returns a DataFrame
        y = smile.process_file(path)
        dfs.append(y)  # Append the DataFrame to the list
    
    # Concatenate all DataFrames in the list
    concatenated_df = pd.concat(dfs, ignore_index=True)
    
    return concatenated_df

In [22]:
df_processed = generate_df(df['file_path'])

Processing files: 100%|████████████████████████████████████████████████████████████| 1975/1975 [03:31<00:00,  9.35it/s]


In [23]:
from sklearn.preprocessing import StandardScaler
import os

num_clusters = 9

# Set LOKY_MAX_CPU_COUNT environment variable
os.environ["LOKY_MAX_CPU_COUNT"] = "4"  # Replace "4" with the number of cores you want to use

from sklearn.cluster import KMeans

# Instantiate the StandardScaler
scaler = StandardScaler()

# Fit the scaler to your data
scaler.fit(df_processed)

# Transform the data using the scaler
df_scaled = scaler.transform(df_processed)

X_train = df_scaled
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train)

# Get cluster labels for each data point
cluster_labels = kmeans.labels_

from sklearn.metrics import adjusted_rand_score

# Assuming y_true contains the true labels for your data points
# cluster_labels contains the cluster assignments obtained from K-means clustering

ari = adjusted_rand_score(df['emotion'], cluster_labels)
print("Adjusted Rand Index (ARI):", ari)
print()
print('Actual Labels:',df['emotion'][:20].values)
print()
print('Unsupervised Labels', cluster_labels[:20])

Adjusted Rand Index (ARI): 0.1022215714666477

Actual Labels: ['happy' 'neutral' 'angry' 'happy' 'neutral' 'sad' 'angry' 'angry'
 'fearful' 'happy' 'boredom' 'neutral' 'sad' 'angry' 'fearful' 'happy'
 'neutral' 'sad' 'angry' 'angry']

Unsupervised Labels [6 6 4 4 6 8 4 4 6 6 6 6 8 4 6 6 6 8 6 4]


### Process with emobase

In [24]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.emobase,
    feature_level=opensmile.FeatureLevel.Functionals,
)

from tqdm import tqdm

def generate_df(file_paths):
    dfs = []  # List to accumulate DataFrames
    
    for path in tqdm(file_paths, desc="Processing files"):
        # Assuming smile.process_file returns a DataFrame
        y = smile.process_file(path)
        dfs.append(y)  # Append the DataFrame to the list
    
    # Concatenate all DataFrames in the list
    concatenated_df = pd.concat(dfs, ignore_index=True)
    
    return concatenated_df

In [25]:
df_processed = generate_df(df['file_path'])

Processing files: 100%|████████████████████████████████████████████████████████████| 1975/1975 [02:04<00:00, 15.90it/s]


In [26]:
# Instantiate the StandardScaler
scaler = StandardScaler()

# Fit the scaler to your data
scaler.fit(df_processed)

# Transform the data using the scaler
df_scaled = scaler.transform(df_processed)

X_train = df_scaled
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train)

# Get cluster labels for each data point
cluster_labels = kmeans.labels_

from sklearn.metrics import adjusted_rand_score

# Assuming y_true contains the true labels for your data points
# cluster_labels contains the cluster assignments obtained from K-means clustering

ari = adjusted_rand_score(df['emotion'], cluster_labels)
print("Adjusted Rand Index (ARI):", ari)
print()
print('Actual Labels:',df['emotion'][:20].values)
print()
print('Unsupervised Labels', cluster_labels[:20])

Adjusted Rand Index (ARI): 0.061585928865284405

Actual Labels: ['happy' 'neutral' 'angry' 'happy' 'neutral' 'sad' 'angry' 'angry'
 'fearful' 'happy' 'boredom' 'neutral' 'sad' 'angry' 'fearful' 'happy'
 'neutral' 'sad' 'angry' 'angry']

Unsupervised Labels [4 2 4 4 2 2 4 6 4 4 2 2 2 4 4 4 2 3 4 4]


### Process with ComParE_2016

In [27]:
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

from tqdm import tqdm

def generate_df(file_paths):
    dfs = []  # List to accumulate DataFrames
    
    for path in tqdm(file_paths, desc="Processing files"):
        # Assuming smile.process_file returns a DataFrame
        y = smile.process_file(path)
        dfs.append(y)  # Append the DataFrame to the list
    
    # Concatenate all DataFrames in the list
    concatenated_df = pd.concat(dfs, ignore_index=True)
    
    return concatenated_df

In [28]:
df_processed = generate_df(df['file_path'])

Processing files: 100%|████████████████████████████████████████████████████████████| 1975/1975 [04:50<00:00,  6.80it/s]


In [29]:
# Instantiate the StandardScaler
scaler = StandardScaler()

# Fit the scaler to your data
scaler.fit(df_processed)

# Transform the data using the scaler
df_scaled = scaler.transform(df_processed)

X_train = df_scaled
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train)

# Get cluster labels for each data point
cluster_labels = kmeans.labels_

from sklearn.metrics import adjusted_rand_score

# Assuming y_true contains the true labels for your data points
# cluster_labels contains the cluster assignments obtained from K-means clustering

ari = adjusted_rand_score(df['emotion'], cluster_labels)
print("Adjusted Rand Index (ARI):", ari)
print()
print('Actual Labels:',df['emotion'][:20].values)
print()
print('Unsupervised Labels', cluster_labels[:20])

Adjusted Rand Index (ARI): 0.06293955713719294

Actual Labels: ['happy' 'neutral' 'angry' 'happy' 'neutral' 'sad' 'angry' 'angry'
 'fearful' 'happy' 'boredom' 'neutral' 'sad' 'angry' 'fearful' 'happy'
 'neutral' 'sad' 'angry' 'angry']

Unsupervised Labels [3 3 4 3 3 6 3 4 3 4 6 4 6 4 3 3 3 6 4 4]


### Conclusion for Unsupervised Learning

1. K-Means clustering does not work well on the features for the standard feature sets. 0.06-0.09 accuracy even when scaling is applied.
2. This may be due to the overwhelming amount of features 62, 88, 988, 6373 which may include alot of redundant features.
3. It would be difficult to identify important features from such an overwhelming amount of features for unsupervised learning. Hence, we would explore the use State of The Art models such as wav2vec2.

### Using ANOVA to reduce the number of features

In [7]:
from sklearn.feature_selection import SelectPercentile, f_classif
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
import numpy as np

# Emobase feature set
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.emobase,
    feature_level=opensmile.FeatureLevel.Functionals,
)

from tqdm import tqdm

def generate_df(file_paths):
    dfs = []  # List to accumulate DataFrames
    
    for path in tqdm(file_paths, desc="Processing files"):
        # Assuming smile.process_file returns a DataFrame
        y = smile.process_file(path)
        dfs.append(y)  # Append the DataFrame to the list
    
    # Concatenate all DataFrames in the list
    concatenated_df = pd.concat(dfs, ignore_index=True)
    
    return concatenated_df

df_processed = generate_df(df['file_path'])

Processing files: 100%|████████████████████████████████████████████████████████████| 1975/1975 [02:35<00:00, 12.67it/s]


NameError: name 'StandardScaler' is not defined

In [19]:
import os
from sklearn.preprocessing import StandardScaler

# Instantiate the StandardScaler
scaler = StandardScaler()
# Fit the scaler to your data
scaler.fit(df_processed)
# Transform the data using the scaler
df_scaled = scaler.transform(df_processed)

X = df_scaled
y = df['emotion']

# Perform ANOVA
selector = SelectPercentile(score_func=f_classif, percentile=25)
X_selected = selector.fit_transform(X, y)

# Get indices of selected features
selected_indices = selector.get_support(indices=True)
selected_features_names = df_processed.columns[selected_indices]
print(selected_features_names)

df_selected = df_processed[selected_features_names]
scaler = StandardScaler()
scaler.fit(df_selected)
# Transform the data using the scaler
df_scaled = scaler.transform(df_selected)

num_clusters = 9

# Set LOKY_MAX_CPU_COUNT environment variable
os.environ["LOKY_MAX_CPU_COUNT"] = "4"  # Replace "4" with the number of cores you want to use

from sklearn.cluster import KMeans

X_train = df_scaled
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train)

# Get cluster labels for each data point
cluster_labels = kmeans.labels_

from sklearn.metrics import adjusted_rand_score

# Assuming y_true contains the true labels for your data points
# cluster_labels contains the cluster assignments obtained from K-means clustering

ari = adjusted_rand_score(df['emotion'], cluster_labels)
print("Adjusted Rand Index (ARI):", ari)
print()
print('Actual Labels:',df['emotion'][:20].values)
print()
print('Unsupervised Labels', cluster_labels[:20])

  f = msb / msw


Index(['pcm_intensity_sma_max', 'pcm_intensity_sma_range',
       'pcm_intensity_sma_amean', 'pcm_intensity_sma_linregc1',
       'pcm_intensity_sma_linregc2', 'pcm_intensity_sma_linregerrA',
       'pcm_intensity_sma_linregerrQ', 'pcm_intensity_sma_stddev',
       'pcm_intensity_sma_quartile1', 'pcm_intensity_sma_quartile2',
       ...
       'lspFreq_sma_de[6]_quartile3', 'lspFreq_sma_de[6]_iqr1-2',
       'lspFreq_sma_de[6]_iqr2-3', 'lspFreq_sma_de[6]_iqr1-3',
       'lspFreq_sma_de[7]_kurtosis', 'lspFreq_sma_de[7]_quartile1',
       'lspFreq_sma_de[7]_quartile3', 'lspFreq_sma_de[7]_iqr1-2',
       'lspFreq_sma_de[7]_iqr2-3', 'lspFreq_sma_de[7]_iqr1-3'],
      dtype='object', length=247)
Adjusted Rand Index (ARI): 0.08578289142855984

Actual Labels: ['happy' 'neutral' 'angry' 'happy' 'neutral' 'sad' 'angry' 'angry'
 'fearful' 'happy' 'boredom' 'neutral' 'sad' 'angry' 'fearful' 'happy'
 'neutral' 'sad' 'angry' 'angry']

Unsupervised Labels [7 5 7 7 5 1 7 6 7 7 5 5 1 7 7 7 5 1 7 7]


In [23]:
from sklearn.feature_selection import SelectPercentile, f_classif, SelectKBest
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
import numpy as np

# GeMAPSv01b
smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.GeMAPSv01b,
    feature_level=opensmile.FeatureLevel.Functionals,
)

from tqdm import tqdm

def generate_df(file_paths):
    dfs = []  # List to accumulate DataFrames
    
    for path in tqdm(file_paths, desc="Processing files"):
        # Assuming smile.process_file returns a DataFrame
        y = smile.process_file(path)
        dfs.append(y)  # Append the DataFrame to the list
    
    # Concatenate all DataFrames in the list
    concatenated_df = pd.concat(dfs, ignore_index=True)
    
    return concatenated_df

df_processed = generate_df(df['file_path'])

Processing files: 100%|████████████████████████████████████████████████████████████| 1975/1975 [02:48<00:00, 11.69it/s]


In [27]:
import os
from sklearn.preprocessing import StandardScaler

X = df_processed.values
y = df['emotion']

# Perform ANOVA
selector = SelectKBest(score_func=f_classif, k=30)
X_selected = selector.fit_transform(X, y)

# Get indices of selected features
selected_indices = selector.get_support(indices=True)
selected_features_names = df_processed.columns[selected_indices]
print(selected_features_names)

df_selected = df_processed[selected_features_names]
scaler = StandardScaler()
scaler.fit(df_selected)
# Transform the data using the scaler
df_scaled = scaler.transform(df_selected)

num_clusters = 9

# Set LOKY_MAX_CPU_COUNT environment variable
os.environ["LOKY_MAX_CPU_COUNT"] = "4"  # Replace "4" with the number of cores you want to use

from sklearn.cluster import KMeans

X_train = df_scaled
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X_train)

# Get cluster labels for each data point
cluster_labels = kmeans.labels_

from sklearn.metrics import adjusted_rand_score

# Assuming y_true contains the true labels for your data points
# cluster_labels contains the cluster assignments obtained from K-means clustering

ari = adjusted_rand_score(df['emotion'], cluster_labels)
print("Adjusted Rand Index (ARI):", ari)
print()
print('Actual Labels:',df['emotion'][:20].values)
print()
print('Unsupervised Labels', cluster_labels[:20])

Index(['F0semitoneFrom27.5Hz_sma3nz_amean',
       'F0semitoneFrom27.5Hz_sma3nz_percentile20.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile50.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile80.0',
       'F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2', 'loudness_sma3_amean',
       'loudness_sma3_stddevNorm', 'loudness_sma3_percentile20.0',
       'loudness_sma3_percentile50.0', 'loudness_sma3_percentile80.0',
       'loudness_sma3_pctlrange0-2', 'loudness_sma3_meanRisingSlope',
       'loudness_sma3_stddevRisingSlope', 'loudness_sma3_meanFallingSlope',
       'loudness_sma3_stddevFallingSlope', 'jitterLocal_sma3nz_amean',
       'shimmerLocaldB_sma3nz_stddevNorm', 'logRelF0-H1-A3_sma3nz_amean',
       'F1frequency_sma3nz_amean', 'F1amplitudeLogRelF0_sma3nz_amean',
       'F1amplitudeLogRelF0_sma3nz_stddevNorm', 'F2frequency_sma3nz_amean',
       'F2amplitudeLogRelF0_sma3nz_amean',
       'F2amplitudeLogRelF0_sma3nz_stddevNorm',
       'F3amplitudeLogRelF0_sma3nz_amean',
       'F3amp



Adjusted Rand Index (ARI): 0.08951302793663957

Actual Labels: ['happy' 'neutral' 'angry' 'happy' 'neutral' 'sad' 'angry' 'angry'
 'fearful' 'happy' 'boredom' 'neutral' 'sad' 'angry' 'fearful' 'happy'
 'neutral' 'sad' 'angry' 'angry']

Unsupervised Labels [6 7 6 6 3 7 6 1 6 6 3 3 7 6 6 6 7 7 6 6]
