In [4]:
import torch
import librosa as lb
import numpy as np

#I am using it in jupyter so I defined DNN again
#not recommanded using it in jupyter
#If you use vscode or pycharm, just'from train import DNN'
class DNN(torch.nn.Module):
    def __init__(self, input_size=40, hidden_size=128, output_size=8):
        super(DNN, self).__init__()
        self.hidden = torch.nn.Linear(input_size, hidden_size)
        self.relu = torch.nn.ReLU()
        self.output = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.hidden(x)
        x = self.relu(x)
        x = self.output(x)
        return x

    
    
#Initialize the model
model = DNN().to("cuda")

#Load the saved model
model.load_state_dict(torch.load('/path/to/your/emotion_model.pth'))
model.eval()  #set the model to evaluation mode

print("Model loaded and ready for inference")

#Function of extract audio features, it's same as in get_data.ipynb
def audio_features(wav_file_path, mfcc=True, chroma=False, mel=False, sample_rate=22050):
    audio, sample_rate = lb.load(wav_file_path, sr=sample_rate)
    if len(audio.shape) != 1:
        return None
    result = np.array([])
    if mfcc:
        mfccs = np.mean(lb.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
        result = np.hstack((result, mfccs))
    if chroma:
        stft = np.abs(lb.stft(audio))
        chroma = np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
        result = np.hstack((result, chroma))
    if mel:
        mel = np.mean(lb.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=40, fmin=0, fmax=sample_rate//2).T, axis=0)
        result = np.hstack((result, mel))
    return result

#load and preprocess an audio file
wav_file_path = "/path/to/your/audio.wav"
features = audio_features(wav_file_path)

#convert features to tensor and move to GPU
features_tensor = torch.tensor(features, dtype=torch.float).to("cuda")

#prediction
with torch.no_grad():
    outputs = model(features_tensor.unsqueeze(0))  
    predicted_label = outputs.argmax(1).item()

ravdess_label_dict = {
    "01": "neutral", "02": "calm", "03": "happy", "04": "sad",
    "05": "angry", "06": "fear", "07": "disgust", "08": "surprise"
}
predicted_emotion = list(ravdess_label_dict.values())[predicted_label]

print(f"Predicted emotion: {predicted_emotion}")


Model loaded and ready for inference
Predicted emotion: happy
