# Speech Emotion Detection using wav files

## Preparation

### Install

In [1]:
!pip install transformers
!pip install librosa
!pip install --upgrade numba

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


### Import

In [2]:
from transformers import pipeline
import pandas as pd
import os
import librosa
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Download Model

In [3]:
classifier = pipeline("audio-classification", model="superb/wav2vec2-base-superb-er")



## Functions Definition

### Extract emotions given a Range

In [4]:
def extract_emotions(audio,sr,start, end):
    return classifier(audio[int(start * sr):int(end * sr)], top_k=5)

### Append Emotions in a csv File

In [5]:
def speech_emotions(csv_file_path,wav_file_path,out_file_path):

    emotions=[]

    # Open the CSV file
    df = pd.read_csv(csv_file_path, encoding="utf8")

    # Drop rows that can't be audio handled
    df["Duration"] = df.apply(lambda row: int(row["End time (s)"] - row["Start time (s)"]), axis=1)
    df = df[df["Duration"] >= 0.1] 
    df = df.drop('Duration', axis=1)
    
    # Load Audio
    audio, sr = librosa.load(wav_file_path, sr=16000)

    # Iterate through the rows of the DataFrame and extract start and end ranges
    for index, row in df.iterrows():
        start = row['Start time (s)']
        end = row['End time (s)']
        
        # Call the function with the extracted start and end ranges
        emotions.append(extract_emotions(audio,sr,start, end))
        
    scores = [[d['score'] for d in inner] for inner in emotions]
    
    neu = []
    ang = []
    hap =[]
    sad = []
    for i in range(len(scores)):
        neu.append(scores[i][0])
        ang.append(scores[i][1])
        hap.append(scores[i][2])
        sad.append(scores[i][3])
    df['speech_neu']= neu
    df['speech_ang']= ang
    df['speech_hap']= hap
    df['speech_sad']= sad
    
    # Export the new csv
    df.to_csv(out_file_path, index=False, encoding="utf-8-sig")

### Append Emotions in Batch

In [6]:
def speech_emotions_batch(csv_directory_path, wav_directory_path, directory_out):

  counter= 1
  for filename in os.listdir(csv_directory_path):
      # Check if the file is a CSV file
      if filename.endswith("_dia_trans_users.csv"):
          csv_file_path = os.path.join(csv_directory_path, filename)
          
          # Get the corresponding WAV file path
          wav_filename = filename[:-20] + ".wav"
          wav_file_path = os.path.join(wav_directory_path, wav_filename)

          # Create the output file
          out_file_path = os.path.join(directory_out, filename[:-20]+"_speech.csv")
          
          # Check if the WAV file exists
          if os.path.exists(wav_file_path) and os.path.exists(out_file_path)==False:

              # Call the processing function with the CSV and WAV data
              speech_emotions(csv_file_path, wav_file_path, out_file_path)

              # Print the name of the processed file
              print(f"Processed file {counter}: {filename}")
              counter += 1

## Use of function

In [7]:
csv_directory_path = r'/content/drive/MyDrive/Projects/tps/data/8. dia_trans_verified_users'
wav_directory_path = r'/content/drive/MyDrive/Projects/tps/data/2. wav'
directory_out = r'/content/drive/MyDrive/Projects/tps/data/9. speech_emotions'

In [8]:
speech_emotions_batch(csv_directory_path, wav_directory_path, directory_out)

Processed file 1: 9.2_dia_trans_users.csv
Processed file 2: 7.3_dia_trans_users.csv
