### Set the corresponding values in the cell below. Afterwards, just run the following cells

In [20]:
import pathlib
# Class label csv path
labels_csv_path = 'datasets/home_labels.csv'
# Datset folder
dataset_root_path = pathlib.Path('datasets')
# output result csv path
output_csv_path = 'outputs/clap_results.csv'

### Import libraries 

In [21]:
from msclap import CLAP
import torch.nn.functional as F
import numpy as np
import torch
import os
from moviepy.editor import VideoFileClip

### Get Audio from video

### Read class Labels

In [22]:
import csv

label2id = {}
id2label = {}
with open(labels_csv_path, mode='r') as file:
    csv_reader = csv.reader(file)

    for i, row in enumerate(csv_reader):
        class_name = row[0]
        label2id[class_name] = i 
        id2label[i] = class_name

class_labels = list(label2id.keys())

print(f"{len(class_labels)} Unique classes: {class_labels}.")

70 Unique classes: ['No people is in the room', 'Studying', 'Typing', 'Using computer', 'Making phone calls', 'Play with phone/tablet', 'Playing with pets', 'Sitting down', 'Reading', 'Writing', 'Setting table', 'Eating food', 'Dining', 'Cooking', 'Sleeping', 'Laying down', 'Picking up objects', 'Brushing teeth', 'Showering', 'Playing music', 'TV', 'Cleaning', 'Doing laundry', 'Mopping floor', 'Vacumning', 'Organizing space', 'Sewing', 'Knitting', 'Decorating', 'Party', 'Chatting', 'Talking', 'Singing', 'Laughing', 'Speaking', 'Dancing', 'Drinking', 'Meditating', 'Drawing', 'Painting', 'Playing board games', 'Playing video games', 'Taking photos', 'Potluck', 'Working', 'Exercising', 'Walking', 'Running', 'Celebrating', 'Physical altercations', 'Verbal confrontations', 'Using drug', 'Theft or vandalism', 'Fighting', 'Domestic violence', 'Break in', 'Glass breaking', 'Unattended cooking', 'Smoke or Fire or Open flame', 'Smoking', 'Gunshot', 'Making noise', 'Falling down', 'Tripping', 'Cr

### CLAP inference with given audio

In [23]:
from tqdm import tqdm
from collections import defaultdict
import csv

train_paths = list(dataset_root_path.glob("train/*/*"))

# Load model (Choose between versions '2022' or '2023')
# The model weight will be downloaded automatically if `model_fp` is not specified
clap_model = CLAP(version = '2023', use_cuda=True)

correct = defaultdict(int)
total = defaultdict(int)
with open(output_csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["file path", "ground truth", "top1 pred", "confidence", "top2 pred", "confidence", "top3 pred", "confidence", "top4 pred", "confidence", "top5 pred", "confidence"])

    for path in tqdm(train_paths):
        label = str(path).split('\\')[-2]
        out_row = [path, label2id[label]]
        total[label] += 1

        # sanity format check
        try: assert(str(path).endswith(".mp4") or str(path).endswith(".wav"))
        except: print(f"ERROR: {path} is not a supported file type")

        path = path.as_posix()
        if str(path).endswith(".mp4"):
            video = VideoFileClip(path)
            if video.audio is None:
                print(f"This video {path} has no audio!")
                out_row.extend([-1, 0] * 5) # use -1 if no audio
                writer.writerow(out_row)
                continue
            else:
                audio_path = 'datasets/audios' + path[8:-3] + 'wav'
                audio_dir = "/".join(audio_path.split("/")[:-1])
                print(audio_dir)
                if not os.path.exists(audio_dir):
                        os.makedirs(audio_dir)
                audio_arr = video.audio.write_audiofile(audio_path, verbose=False)
                # print(f"Audio saved to {audio_path}")
        else:
            audio_path = path
        
        # make CLAP predictions
        with torch.no_grad():

            # Extract text embeddings
            text_embeddings = clap_model.get_text_embeddings([f"This is a sound of {c}"for c in class_labels])

            # Extract audio embeddings
            audio_embeddings = clap_model.get_audio_embeddings([audio_path])

            # Compute similarity between audio and text embeddings 
            similarities = clap_model.compute_similarity(audio_embeddings, text_embeddings)

        similarity = F.softmax(similarities, dim=1)
        values, indices = similarity[0].topk(5)
        pred_index = indices[0]

        # Output the results
        
        print("\nFile:", path)
        print("Ground Truth: {}".format(audio_path.split('/')[-2]))
        print("Top predictions:")
        for value, index in zip(values, indices):
            print(f"{class_labels[index]:>16s}: {100 * value.item():.2f}%")
            out_row.append(index.item())
            out_row.append(round(value.item(), 6))
        print("")
        writer.writerow(out_row)

        if label == class_labels[pred_index]: 
            correct[label] += 1
        


  0%|          | 0/422 [00:00<?, ?it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/9q8mf6GUiiE_000009_000019.wav


  0%|          | 0/422 [00:00<?, ?it/s]

MoviePy - Done.


  0%|          | 1/422 [00:00<02:45,  2.54it/s]


File: datasets/train/Crying/9q8mf6GUiiE_000009_000019.mp4
Ground Truth: Crying
Top predictions:
     Suffocating: 17.15%
         Singing: 11.24%
No people is in the room: 11.22%
      Meditating: 10.67%
     Celebrating: 6.50%



  0%|          | 1/422 [00:00<02:45,  2.54it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/9s4bfiyc9Yc_000195_000205.wav


  0%|          | 2/422 [00:00<02:36,  2.68it/s]

MoviePy - Done.

File: datasets/train/Crying/9s4bfiyc9Yc_000195_000205.mp4
Ground Truth: Crying
Top predictions:
          Crying: 80.42%
     Suffocating: 13.81%
Playing with pets: 2.14%
  Brushing teeth: 0.64%
Domestic violence: 0.55%



  0%|          | 2/422 [00:00<02:36,  2.68it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/a4xHx59uFgk_000003_000013.wav


  1%|          | 3/422 [00:01<02:26,  2.87it/s]

MoviePy - Done.

File: datasets/train/Crying/a4xHx59uFgk_000003_000013.mp4
Ground Truth: Crying
Top predictions:
          Crying: 40.74%
No people is in the room: 17.49%
Playing with pets: 5.88%
     Suffocating: 5.76%
         Talking: 4.18%



  1%|          | 3/422 [00:01<02:26,  2.87it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/a50yimv7Lqg_000026_000036.wav


  1%|          | 4/422 [00:01<02:25,  2.88it/s]

MoviePy - Done.

File: datasets/train/Crying/a50yimv7Lqg_000026_000036.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 46.58%
     Celebrating: 17.09%
Playing with pets: 15.59%
         Singing: 7.55%
     Eating food: 2.33%



  1%|          | 4/422 [00:01<02:25,  2.88it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Afhkn2h-wLE_000000_000010.wav


  1%|          | 5/422 [00:01<02:23,  2.90it/s]

MoviePy - Done.

File: datasets/train/Crying/Afhkn2h-wLE_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 39.94%
Playing with pets: 24.78%
     Suffocating: 15.53%
     Celebrating: 2.77%
        Laughing: 2.47%



  1%|          | 5/422 [00:01<02:23,  2.90it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/aMAJ-DRt6bU_000116_000126.wav


  1%|▏         | 6/422 [00:02<02:18,  3.00it/s]

MoviePy - Done.

File: datasets/train/Crying/aMAJ-DRt6bU_000116_000126.mp4
Ground Truth: Crying
Top predictions:
        Sleeping: 43.88%
          Crying: 23.86%
        Laughing: 7.62%
     Suffocating: 6.99%
         Choking: 3.44%



  1%|▏         | 6/422 [00:02<02:18,  3.00it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/a_yBdlVHrK8_000000_000010.wav


  2%|▏         | 7/422 [00:02<02:23,  2.89it/s]

MoviePy - Done.

File: datasets/train/Crying/a_yBdlVHrK8_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 84.82%
     Suffocating: 12.62%
Domestic violence: 0.40%
Playing with pets: 0.30%
         Choking: 0.28%



  2%|▏         | 7/422 [00:02<02:23,  2.89it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/fKrcKrs-o_w_000000_000010.wav


  2%|▏         | 8/422 [00:02<02:18,  2.99it/s]

MoviePy - Done.

File: datasets/train/Crying/fKrcKrs-o_w_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 57.49%
        Sleeping: 8.54%
     Suffocating: 6.99%
      Meditating: 2.81%
Making phone calls: 2.55%



  2%|▏         | 8/422 [00:02<02:18,  2.99it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/fLzGu07zA-I_000008_000018.wav


  2%|▏         | 9/422 [00:03<02:14,  3.07it/s]

MoviePy - Done.

File: datasets/train/Crying/fLzGu07zA-I_000008_000018.mp4
Ground Truth: Crying
Top predictions:
          Crying: 90.20%
     Suffocating: 6.23%
Making phone calls: 1.03%
  Brushing teeth: 0.48%
Domestic violence: 0.44%



  2%|▏         | 9/422 [00:03<02:14,  3.07it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/FM06rkKxFms_000010_000020.wav


  2%|▏         | 10/422 [00:03<02:14,  3.07it/s]

MoviePy - Done.

File: datasets/train/Crying/FM06rkKxFms_000010_000020.mp4
Ground Truth: Crying
Top predictions:
          Crying: 57.20%
     Suffocating: 21.99%
Playing with pets: 11.32%
     Celebrating: 1.58%
Play with phone/tablet: 1.13%



  2%|▏         | 10/422 [00:03<02:14,  3.07it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/G6QENvmNkRE_000000_000010.wav


  3%|▎         | 11/422 [00:03<02:11,  3.13it/s]

MoviePy - Done.

File: datasets/train/Crying/G6QENvmNkRE_000000_000010.mp4
Ground Truth: Crying
Top predictions:
Playing with pets: 37.03%
          Crying: 15.16%
        Laughing: 11.43%
     Suffocating: 8.12%
No people is in the room: 4.35%



  3%|▎         | 11/422 [00:03<02:11,  3.13it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Ga9lFu4gurw_000015_000025.wav


  3%|▎         | 12/422 [00:03<02:07,  3.21it/s]

MoviePy - Done.

File: datasets/train/Crying/Ga9lFu4gurw_000015_000025.mp4
Ground Truth: Crying
Top predictions:
          Crying: 74.48%
     Suffocating: 13.48%
Domestic violence: 5.36%
Playing with pets: 2.47%
Making phone calls: 1.06%



  3%|▎         | 12/422 [00:04<02:07,  3.21it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/GnIgOUPiMqE_000023_000033.wav


  3%|▎         | 13/422 [00:04<02:06,  3.22it/s]

MoviePy - Done.

File: datasets/train/Crying/GnIgOUPiMqE_000023_000033.mp4
Ground Truth: Crying
Top predictions:
          Crying: 82.58%
     Suffocating: 7.74%
Making phone calls: 1.86%
      Meditating: 1.30%
Playing with pets: 1.23%



  3%|▎         | 13/422 [00:04<02:06,  3.22it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/gpaANcmFUOI_000003_000013.wav


  3%|▎         | 14/422 [00:04<02:11,  3.10it/s]

MoviePy - Done.

File: datasets/train/Crying/gpaANcmFUOI_000003_000013.mp4
Ground Truth: Crying
Top predictions:
          Crying: 79.12%
     Suffocating: 11.54%
Playing with pets: 2.21%
No people is in the room: 1.08%
     Celebrating: 0.93%



  3%|▎         | 14/422 [00:04<02:11,  3.10it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/G_-Hx6u40nE_000032_000042.wav


  4%|▎         | 15/422 [00:04<02:11,  3.10it/s]

MoviePy - Done.

File: datasets/train/Crying/G_-Hx6u40nE_000032_000042.mp4
Ground Truth: Crying
Top predictions:
          Crying: 76.35%
     Suffocating: 10.14%
No people is in the room: 1.99%
Domestic violence: 1.93%
Playing with pets: 1.70%



  4%|▎         | 15/422 [00:05<02:11,  3.10it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/H85OllZBK70_000013_000023.wav


  4%|▍         | 16/422 [00:05<02:08,  3.15it/s]

MoviePy - Done.

File: datasets/train/Crying/H85OllZBK70_000013_000023.mp4
Ground Truth: Crying
Top predictions:
          Crying: 58.44%
     Suffocating: 26.12%
      Meditating: 5.30%
     Celebrating: 1.54%
No people is in the room: 1.46%



  4%|▍         | 16/422 [00:05<02:08,  3.15it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Ha3vwLcjMAs_000157_000167.wav


  4%|▍         | 17/422 [00:05<02:07,  3.18it/s]

MoviePy - Done.

File: datasets/train/Crying/Ha3vwLcjMAs_000157_000167.mp4
Ground Truth: Crying
Top predictions:
   Doing laundry: 21.83%
     Suffocating: 20.89%
          Crying: 16.19%
  Brushing teeth: 14.14%
No people is in the room: 4.40%



  4%|▍         | 17/422 [00:05<02:07,  3.18it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/l5UWczmUVeY_000019_000029.wav


  4%|▍         | 18/422 [00:05<02:06,  3.20it/s]

MoviePy - Done.

File: datasets/train/Crying/l5UWczmUVeY_000019_000029.mp4
Ground Truth: Crying
Top predictions:
          Crying: 57.49%
     Suffocating: 24.86%
        Laughing: 4.96%
        Sleeping: 2.47%
Playing with pets: 1.77%



  4%|▍         | 18/422 [00:06<02:06,  3.20it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/oH_BDY4bWp4_000000_000010.wav


  5%|▍         | 19/422 [00:06<02:04,  3.23it/s]

MoviePy - Done.

File: datasets/train/Crying/oH_BDY4bWp4_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 85.35%
     Suffocating: 6.82%
Domestic violence: 1.35%
Making phone calls: 1.27%
  Brushing teeth: 1.10%



  5%|▍         | 19/422 [00:06<02:04,  3.23it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Oi7DvmU-tF0_000000_000010.wav


  5%|▍         | 20/422 [00:06<02:07,  3.16it/s]

MoviePy - Done.

File: datasets/train/Crying/Oi7DvmU-tF0_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 89.67%
     Suffocating: 7.61%
Domestic violence: 0.92%
Making phone calls: 0.26%
  Brushing teeth: 0.23%



  5%|▍         | 20/422 [00:06<02:07,  3.16it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/oNiWBhu1-JQ_000000_000010.wav


  5%|▍         | 21/422 [00:06<02:09,  3.11it/s]

MoviePy - Done.

File: datasets/train/Crying/oNiWBhu1-JQ_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 75.05%
     Suffocating: 13.51%
        Sleeping: 2.46%
        Laughing: 1.26%
Playing with pets: 1.12%



  5%|▍         | 21/422 [00:07<02:09,  3.11it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Pa4kyZAUTXQ_000017_000027.wav


  5%|▌         | 22/422 [00:07<02:05,  3.19it/s]

MoviePy - Done.

File: datasets/train/Crying/Pa4kyZAUTXQ_000017_000027.mp4
Ground Truth: Crying
Top predictions:
          Crying: 79.31%
     Suffocating: 13.20%
      Meditating: 1.43%
        Sleeping: 1.18%
Playing with pets: 0.96%



  5%|▌         | 22/422 [00:07<02:05,  3.19it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/pg4HbzuxCIE_000022_000032.wav


  5%|▌         | 23/422 [00:07<02:05,  3.18it/s]

MoviePy - Done.

File: datasets/train/Crying/pg4HbzuxCIE_000022_000032.mp4
Ground Truth: Crying
Top predictions:
          Crying: 70.26%
     Suffocating: 12.69%
Playing with pets: 4.17%
        Laughing: 2.06%
         Choking: 1.66%



  5%|▌         | 23/422 [00:07<02:05,  3.18it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/pHgqoFKTBDc_000004_000014.wav


  6%|▌         | 24/422 [00:07<02:04,  3.19it/s]

MoviePy - Done.

File: datasets/train/Crying/pHgqoFKTBDc_000004_000014.mp4
Ground Truth: Crying
Top predictions:
          Crying: 88.09%
     Suffocating: 4.46%
      Meditating: 1.18%
        Sleeping: 1.08%
Playing with pets: 0.66%



  6%|▌         | 24/422 [00:07<02:04,  3.19it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/pZxdg1Stvb8_000000_000010.wav


  6%|▌         | 25/422 [00:08<02:02,  3.23it/s]

MoviePy - Done.

File: datasets/train/Crying/pZxdg1Stvb8_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 87.16%
     Suffocating: 6.35%
Domestic violence: 1.48%
Making phone calls: 0.96%
  Brushing teeth: 0.48%



  6%|▌         | 25/422 [00:08<02:02,  3.23it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/QApH290EHcU_000035_000045.wav


  6%|▌         | 26/422 [00:08<02:01,  3.26it/s]

MoviePy - Done.

File: datasets/train/Crying/QApH290EHcU_000035_000045.mp4
Ground Truth: Crying
Top predictions:
          Crying: 22.64%
        Laughing: 21.84%
Playing with pets: 20.11%
     Suffocating: 10.30%
         Talking: 6.05%



  6%|▌         | 26/422 [00:08<02:01,  3.26it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/qFOg87dNSh4_000001_000011.wav


  6%|▋         | 27/422 [00:08<02:00,  3.26it/s]

MoviePy - Done.

File: datasets/train/Crying/qFOg87dNSh4_000001_000011.mp4
Ground Truth: Crying
Top predictions:
          Crying: 89.50%
     Suffocating: 7.72%
Playing with pets: 0.56%
Domestic violence: 0.41%
        Laughing: 0.37%



  6%|▋         | 27/422 [00:08<02:00,  3.26it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/quf-iuFOT4s_000000_000010.wav


  7%|▋         | 28/422 [00:09<02:08,  3.07it/s]

MoviePy - Done.

File: datasets/train/Crying/quf-iuFOT4s_000000_000010.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 20.18%
     Celebrating: 12.17%
         Dancing: 10.62%
Playing with pets: 10.07%
          Crying: 7.78%



  7%|▋         | 28/422 [00:09<02:08,  3.07it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/rCwNyAjkGPQ_000031_000041.wav


  7%|▋         | 29/422 [00:09<02:04,  3.15it/s]

MoviePy - Done.

File: datasets/train/Crying/rCwNyAjkGPQ_000031_000041.mp4
Ground Truth: Crying
Top predictions:
No people is in the room: 77.91%
          Crying: 6.13%
         Singing: 4.24%
         Talking: 1.89%
Making phone calls: 1.59%



  7%|▋         | 29/422 [00:09<02:04,  3.15it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/sjZsedv-jG8_000008_000018.wav


  7%|▋         | 30/422 [00:09<02:03,  3.18it/s]

MoviePy - Done.

File: datasets/train/Crying/sjZsedv-jG8_000008_000018.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 76.16%
         Singing: 4.91%
         Talking: 3.78%
          Crying: 3.23%
     Suffocating: 2.77%



  7%|▋         | 30/422 [00:09<02:03,  3.18it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/tIm0yA_u5Qc_000000_000010.wav


  7%|▋         | 31/422 [00:09<02:05,  3.12it/s]

MoviePy - Done.

File: datasets/train/Crying/tIm0yA_u5Qc_000000_000010.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 46.80%
Playing with pets: 27.45%
     Celebrating: 12.94%
     Suffocating: 4.91%
No people is in the room: 1.73%



  7%|▋         | 31/422 [00:10<02:05,  3.12it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/tlY2Cy-VH_g_000005_000015.wav


  8%|▊         | 32/422 [00:10<02:02,  3.18it/s]

MoviePy - Done.

File: datasets/train/Crying/tlY2Cy-VH_g_000005_000015.mp4
Ground Truth: Crying
Top predictions:
          Crying: 53.60%
     Suffocating: 37.49%
        Sleeping: 1.49%
      Meditating: 1.15%
No people is in the room: 0.98%



  8%|▊         | 32/422 [00:10<02:02,  3.18it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/TNl9NjfWZ1o_000084_000094.wav


  8%|▊         | 33/422 [00:10<02:01,  3.21it/s]

MoviePy - Done.

File: datasets/train/Crying/TNl9NjfWZ1o_000084_000094.mp4
Ground Truth: Crying
Top predictions:
          Crying: 46.93%
     Suffocating: 29.32%
Playing with pets: 13.10%
Domestic violence: 2.10%
      Meditating: 1.44%



  8%|▊         | 33/422 [00:10<02:01,  3.21it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/u-oc-Eln_yc_000003_000013.wav


  8%|▊         | 34/422 [00:10<02:02,  3.17it/s]

MoviePy - Done.

File: datasets/train/Crying/u-oc-Eln_yc_000003_000013.mp4
Ground Truth: Crying
Top predictions:
          Crying: 46.80%
     Suffocating: 24.82%
No people is in the room: 10.44%
Playing with pets: 3.91%
Domestic violence: 3.88%



  8%|▊         | 34/422 [00:11<02:02,  3.17it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Ux2iImyAiL4_000001_000011.wav


  8%|▊         | 35/422 [00:11<02:00,  3.21it/s]

MoviePy - Done.

File: datasets/train/Crying/Ux2iImyAiL4_000001_000011.mp4
Ground Truth: Crying
Top predictions:
Playing with pets: 34.68%
        Sleeping: 25.18%
          Crying: 16.00%
     Suffocating: 7.29%
     Eating food: 5.25%



  8%|▊         | 35/422 [00:11<02:00,  3.21it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/VcHysqkJPSE_000002_000012.wav


  9%|▊         | 36/422 [00:11<01:59,  3.22it/s]

MoviePy - Done.

File: datasets/train/Crying/VcHysqkJPSE_000002_000012.mp4
Ground Truth: Crying
Top predictions:
     Celebrating: 20.69%
     Suffocating: 10.26%
No people is in the room: 9.56%
          Crying: 8.91%
      Meditating: 8.73%



  9%|▊         | 36/422 [00:11<01:59,  3.22it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/X3G7b1JNAfs_000000_000010.wav


  9%|▉         | 37/422 [00:11<01:59,  3.22it/s]

MoviePy - Done.

File: datasets/train/Crying/X3G7b1JNAfs_000000_000010.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 57.08%
     Celebrating: 11.23%
Playing with pets: 10.17%
         Talking: 3.51%
          Crying: 3.15%



  9%|▉         | 37/422 [00:12<01:59,  3.22it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/XBNgVxsrqo4_000010_000020.wav


  9%|▉         | 38/422 [00:12<01:58,  3.24it/s]

MoviePy - Done.

File: datasets/train/Crying/XBNgVxsrqo4_000010_000020.mp4
Ground Truth: Crying
Top predictions:
          Crying: 42.76%
     Suffocating: 20.78%
Playing with pets: 8.42%
Domestic violence: 6.35%
No people is in the room: 4.08%



  9%|▉         | 38/422 [00:12<01:58,  3.24it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/xDLo1xt8P6U_000014_000024.wav


  9%|▉         | 39/422 [00:12<02:00,  3.17it/s]

MoviePy - Done.

File: datasets/train/Crying/xDLo1xt8P6U_000014_000024.mp4
Ground Truth: Crying
Top predictions:
          Crying: 59.05%
     Suffocating: 22.47%
No people is in the room: 6.03%
Domestic violence: 3.37%
      Meditating: 3.11%



  9%|▉         | 39/422 [00:12<02:00,  3.17it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/yAgm_XnqBec_000010_000020.wav


  9%|▉         | 40/422 [00:12<02:03,  3.08it/s]

MoviePy - Done.

File: datasets/train/Crying/yAgm_XnqBec_000010_000020.mp4
Ground Truth: Crying
Top predictions:
          Crying: 62.57%
     Suffocating: 17.85%
Domestic violence: 10.56%
Playing with pets: 3.57%
No people is in the room: 2.93%



  9%|▉         | 40/422 [00:12<02:03,  3.08it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/yDD0alN95O8_000015_000025.wav


 10%|▉         | 41/422 [00:13<02:02,  3.11it/s]

MoviePy - Done.

File: datasets/train/Crying/yDD0alN95O8_000015_000025.mp4
Ground Truth: Crying
Top predictions:
No people is in the room: 44.80%
     Suffocating: 24.76%
Domestic violence: 7.64%
          Crying: 6.36%
Playing with pets: 4.05%



 10%|▉         | 41/422 [00:13<02:02,  3.11it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/yDKz5An0qd0_000024_000034.wav


 10%|▉         | 42/422 [00:13<02:00,  3.15it/s]

MoviePy - Done.

File: datasets/train/Crying/yDKz5An0qd0_000024_000034.mp4
Ground Truth: Crying
Top predictions:
          Crying: 60.22%
        Laughing: 19.60%
     Suffocating: 4.71%
        Sleeping: 2.14%
  Brushing teeth: 1.49%



 10%|▉         | 42/422 [00:13<02:00,  3.15it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/zQWdo_Er87M_000008_000018.wav


 10%|█         | 43/422 [00:13<02:03,  3.07it/s]

MoviePy - Done.

File: datasets/train/Crying/zQWdo_Er87M_000008_000018.mp4
Ground Truth: Crying
Top predictions:
          Crying: 89.96%
     Suffocating: 2.94%
Making phone calls: 1.92%
Domestic violence: 1.21%
        Laughing: 1.11%



 10%|█         | 43/422 [00:13<02:03,  3.07it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/ZrFW1iPMnig_000060_000070.wav


 10%|█         | 44/422 [00:14<02:01,  3.11it/s]

MoviePy - Done.

File: datasets/train/Crying/ZrFW1iPMnig_000060_000070.mp4
Ground Truth: Crying
Top predictions:
          Crying: 44.43%
        Laughing: 14.61%
Playing with pets: 14.38%
     Suffocating: 13.04%
  Brushing teeth: 2.60%



 10%|█         | 44/422 [00:14<02:01,  3.11it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/zwOBqeFTgiU_000012_000022.wav


 11%|█         | 45/422 [00:14<01:59,  3.15it/s]

MoviePy - Done.

File: datasets/train/Crying/zwOBqeFTgiU_000012_000022.mp4
Ground Truth: Crying
Top predictions:
          Crying: 52.80%
     Suffocating: 24.21%
Playing with pets: 11.30%
Domestic violence: 5.17%
No people is in the room: 1.09%



 11%|█         | 45/422 [00:14<01:59,  3.15it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_ceBK5pQTrs_000033_000043.wav


 11%|█         | 46/422 [00:14<01:58,  3.19it/s]

MoviePy - Done.

File: datasets/train/Crying/_ceBK5pQTrs_000033_000043.mp4
Ground Truth: Crying
Top predictions:
          Crying: 80.84%
     Suffocating: 11.79%
Playing with pets: 1.58%
Domestic violence: 1.18%
No people is in the room: 0.79%



 11%|█         | 46/422 [00:14<01:58,  3.19it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_fVz9qyplBc_000078_000088.wav


 11%|█         | 47/422 [00:15<02:03,  3.03it/s]

MoviePy - Done.

File: datasets/train/Crying/_fVz9qyplBc_000078_000088.mp4
Ground Truth: Crying
Top predictions:
          Crying: 80.42%
     Suffocating: 14.58%
Playing with pets: 1.72%
Domestic violence: 0.97%
      Meditating: 0.39%



 11%|█         | 47/422 [00:15<02:03,  3.03it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_k0fnTiKEMc_000077_000087.wav


 11%|█▏        | 48/422 [00:15<02:02,  3.06it/s]

MoviePy - Done.

File: datasets/train/Crying/_k0fnTiKEMc_000077_000087.mp4
Ground Truth: Crying
Top predictions:
          Crying: 52.78%
     Suffocating: 19.16%
Playing with pets: 11.17%
        Laughing: 3.88%
No people is in the room: 3.41%



 11%|█▏        | 48/422 [00:15<02:02,  3.06it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_uPh9i-xaaE_000094_000104.wav


 12%|█▏        | 49/422 [00:15<01:58,  3.14it/s]

MoviePy - Done.

File: datasets/train/Crying/_uPh9i-xaaE_000094_000104.mp4
Ground Truth: Crying
Top predictions:
          Crying: 89.69%
     Suffocating: 7.86%
Playing with pets: 0.46%
Making phone calls: 0.44%
      Meditating: 0.29%



 12%|█▏        | 49/422 [00:15<01:58,  3.14it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_wRQiJdk2Rw_000004_000014.wav


 12%|█▏        | 50/422 [00:16<01:56,  3.19it/s]

MoviePy - Done.

File: datasets/train/Crying/_wRQiJdk2Rw_000004_000014.mp4
Ground Truth: Crying
Top predictions:
No people is in the room: 51.36%
     Eating food: 16.42%
          Crying: 8.97%
        Laughing: 4.01%
   Doing laundry: 2.38%

This video datasets/train/Falling down/fall-01-cam0-rgb.mp4 has no audio!


 12%|█▏        | 52/422 [00:16<01:15,  4.90it/s]

This video datasets/train/Falling down/fall-02-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-03-cam0-rgb.mp4 has no audio!


 13%|█▎        | 54/422 [00:16<00:56,  6.52it/s]

This video datasets/train/Falling down/fall-04-cam0-rgb.mp4 has no audio!


 13%|█▎        | 56/422 [00:16<00:45,  7.98it/s]

This video datasets/train/Falling down/fall-05-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-06-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-07-cam0-rgb.mp4 has no audio!


 14%|█▎        | 58/422 [00:16<00:39,  9.20it/s]

This video datasets/train/Falling down/fall-08-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-09-cam0-rgb.mp4 has no audio!


 14%|█▍        | 60/422 [00:16<00:35, 10.14it/s]

This video datasets/train/Falling down/fall-10-cam0-rgb.mp4 has no audio!


 15%|█▍        | 62/422 [00:16<00:33, 10.83it/s]

This video datasets/train/Falling down/fall-11-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-12-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-13-cam0-rgb.mp4 has no audio!


 15%|█▌        | 64/422 [00:17<00:31, 11.38it/s]

This video datasets/train/Falling down/fall-14-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-15-cam0-rgb.mp4 has no audio!


 16%|█▌        | 66/422 [00:17<00:30, 11.81it/s]

This video datasets/train/Falling down/fall-16-cam0-rgb.mp4 has no audio!


 16%|█▌        | 68/422 [00:17<00:29, 12.17it/s]

This video datasets/train/Falling down/fall-17-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-18-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-19-cam0-rgb.mp4 has no audio!


 17%|█▋        | 70/422 [00:17<00:28, 12.41it/s]

This video datasets/train/Falling down/fall-20-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-21-cam0-rgb.mp4 has no audio!


 17%|█▋        | 72/422 [00:17<00:27, 12.56it/s]

This video datasets/train/Falling down/fall-22-cam0-rgb.mp4 has no audio!


 18%|█▊        | 74/422 [00:17<00:27, 12.65it/s]

This video datasets/train/Falling down/fall-23-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-24-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-25-cam0-rgb.mp4 has no audio!


 18%|█▊        | 76/422 [00:18<00:27, 12.75it/s]

This video datasets/train/Falling down/fall-26-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-27-cam0-rgb.mp4 has no audio!


 18%|█▊        | 78/422 [00:18<00:26, 12.79it/s]

This video datasets/train/Falling down/fall-28-cam0-rgb.mp4 has no audio!


 19%|█▉        | 80/422 [00:18<00:26, 12.82it/s]

This video datasets/train/Falling down/fall-29-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-30-cam0-rgb.mp4 has no audio!


 19%|█▉        | 80/422 [00:18<00:26, 12.82it/s]

datasets/audios/train/Glass breaking
MoviePy - Writing audio in datasets/audios/train/Glass breaking/breaking_glass_1.wav


 19%|█▉        | 80/422 [00:18<00:26, 12.82it/s]

MoviePy - Done.

File: datasets/train/Glass breaking/breaking_glass_1.mp4
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 88.73%
Furniture Collapse: 6.38%
Theft or vandalism: 4.26%
        Break in: 0.42%
    Falling down: 0.06%



 19%|█▉        | 80/422 [00:19<00:26, 12.82it/s]

datasets/audios/train/Glass breaking
MoviePy - Writing audio in datasets/audios/train/Glass breaking/breaking_glass_2.wav


 19%|█▉        | 82/422 [00:19<01:04,  5.25it/s]

MoviePy - Done.

File: datasets/train/Glass breaking/breaking_glass_2.mp4
Ground Truth: Glass breaking
Top predictions:
Furniture Collapse: 71.62%
  Glass breaking: 25.65%
    Falling down: 1.95%
        Break in: 0.57%
Theft or vandalism: 0.07%


File: datasets/train/Glass breaking/glass_break_1.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 86.63%
        Break in: 8.08%
Theft or vandalism: 3.44%
Furniture Collapse: 0.77%
Picking up objects: 0.67%



 20%|█▉        | 84/422 [00:19<00:54,  6.23it/s]


File: datasets/train/Glass breaking/glass_break_2.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 92.30%
        Break in: 7.41%
Furniture Collapse: 0.17%
Theft or vandalism: 0.06%
    Falling down: 0.02%


File: datasets/train/Glass breaking/glass_break_3.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 98.19%
        Break in: 1.75%
Furniture Collapse: 0.02%
Theft or vandalism: 0.01%
     Celebrating: 0.00%



 20%|██        | 86/422 [00:19<00:46,  7.19it/s]


File: datasets/train/Glass breaking/glass_break_4.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 91.25%
        Break in: 6.78%
Theft or vandalism: 1.56%
Furniture Collapse: 0.21%
      Decorating: 0.04%



 21%|██        | 88/422 [00:19<00:41,  8.11it/s]


File: datasets/train/Glass breaking/glass_break_5.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 97.23%
        Break in: 2.48%
Theft or vandalism: 0.13%
Furniture Collapse: 0.13%
    Falling down: 0.01%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_007_d5cb28f674c6a53e8553499a58d776cf.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 89.98%
         Talking: 3.65%
        Chatting: 2.47%
         Working: 0.63%
     Eating food: 0.49%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_010_d39ba26fef0985d0d8673eb0c3a20680.wav
Ground Truth: Glass breaking
Top predictions:
         Walking: 33.05%
        Knitting: 8.79%
   Taking photos: 8.56%
      Meditating: 7.44%
    Sitting down: 6.75%



 21%|██▏       | 90/422 [00:19<00:37,  8.89it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_021_ed591cef512b508455115a2aef138cff.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 97.80%
        Break in: 0.65%
Furniture Collapse: 0.65%
No people is in the room: 0.31%
   Setting table: 0.31%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_029_809d27b667b151eca0bd0eddd987d1c4.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 64.50%
   Doing laundry: 26.22%
         Working: 2.33%
  Using computer: 0.98%
   Mopping floor: 0.55%



 22%|██▏       | 92/422 [00:20<00:34,  9.56it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_031_3c02e8e5d8e6b1418d99db6d315538e9.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 38.20%
         Working: 14.43%
         Walking: 11.90%
   Doing laundry: 11.25%
     Eating food: 6.18%



 22%|██▏       | 94/422 [00:20<00:32, 10.17it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_041_d33afde0de2acf62af3ccffbaef6b2a3.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 75.33%
   Doing laundry: 15.48%
         Walking: 2.24%
         Talking: 1.52%
   Taking photos: 0.77%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_084_b09f52a98a49dd2b15e973503f7a0c7a.wav
Ground Truth: Glass breaking
Top predictions:
      Decorating: 53.89%
   Setting table: 21.87%
No people is in the room: 6.25%
         Cooking: 2.18%
       Showering: 2.03%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_085_d3439ccb762e3ca64658fb718ad759e6.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 96.75%
         Working: 0.85%
    Sitting down: 0.58%
   Doing laundry: 0.27%
     Eating food: 0.25%



 23%|██▎       | 96/422 [00:20<00:30, 10.70it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_106_c0350d3ea60fe895d434678f8e5e93c5.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 99.15%
    Sitting down: 0.30%
         Working: 0.15%
     Eating food: 0.07%
   Setting table: 0.05%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_123_b2f92db15d0488c29eabcb31a0c19b0e.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 90.14%
   Taking photos: 1.94%
         Talking: 1.69%
   Doing laundry: 1.50%
         Walking: 0.49%



 23%|██▎       | 98/422 [00:20<00:29, 11.10it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_133_47478d4e1e57ffcfdafc0f0d44afc1f6.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 63.48%
         Walking: 13.09%
     Eating food: 5.81%
         Talking: 4.61%
        Drinking: 3.19%



 24%|██▎       | 100/422 [00:20<00:28, 11.39it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_134_f0e8808559de51919375259ae6bcabd1.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 89.54%
         Smoking: 2.83%
    Making noise: 1.85%
      Meditating: 1.23%
      Exercising: 0.49%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_152_b6fbf1dcf100211ff7160d6fe55ae1da.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 91.59%
          Dining: 1.90%
         Talking: 1.54%
        Chatting: 1.39%
     Eating food: 0.89%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_159_f83fd055358af544642b5fc3ece49e88.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 77.04%
   Setting table: 8.75%
      Decorating: 3.87%
     Eating food: 3.29%
  Glass breaking: 1.17%



 24%|██▍       | 102/422 [00:20<00:27, 11.51it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_192_c15fb1103080ff542a231e5666d0ebda.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 68.04%
Picking up objects: 10.46%
Theft or vandalism: 8.49%
         Smoking: 1.63%
   Taking photos: 1.62%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_195_1d058613c2d9963230f48a8d76b6dcff.wav
Ground Truth: Glass breaking
Top predictions:
         Walking: 25.36%
        Knitting: 17.75%
    Sitting down: 10.85%
      Meditating: 5.10%
   Taking photos: 4.10%



 25%|██▍       | 104/422 [00:21<00:27, 11.70it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_216_6bc7ce3de687324fae214bcc3d9deb97.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 55.31%
        Knitting: 19.67%
         Walking: 7.19%
         Singing: 2.37%
   Taking photos: 1.94%



 25%|██▌       | 106/422 [00:21<00:26, 11.80it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_217_9440e3c6f20260dc148529910981819f.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 98.20%
         Working: 0.49%
   Setting table: 0.35%
    Sitting down: 0.18%
   Doing laundry: 0.15%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_221_80319cb23891fbcf83123c2590bc1ef9.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 94.24%
        Drinking: 0.79%
         Walking: 0.77%
         Talking: 0.44%
   Taking photos: 0.30%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_222_3a84a85f8da2c498588baf1352cbd840.wav
Ground Truth: Glass breaking
Top predictions:
          Dining: 54.88%
     Eating food: 14.88%
No people is in the room: 9.90%
        Drinking: 5.41%
      Decorating: 3.37%



 26%|██▌       | 108/422 [00:21<00:26, 11.84it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_236_695a49eb49bd0db7534c7a9ae13aad9d.wav
Ground Truth: Glass breaking
Top predictions:
    Sitting down: 47.94%
No people is in the room: 26.96%
      Meditating: 15.95%
        Sleeping: 2.58%
         Working: 1.91%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_247_0e1843a284cab188e6b07a799bf336ba.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 96.28%
         Working: 1.13%
    Sitting down: 0.38%
   Doing laundry: 0.31%
         Walking: 0.27%



 26%|██▌       | 110/422 [00:21<00:26, 11.73it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_250_996579dfc2bdae6401586fe58628f67f.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 95.03%
   Setting table: 1.18%
         Talking: 0.94%
         Working: 0.56%
        Chatting: 0.35%



 27%|██▋       | 112/422 [00:21<00:26, 11.69it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_251_18742daaca944e44dff28088351addcf.wav
Ground Truth: Glass breaking
Top predictions:
          Dining: 50.84%
     Eating food: 14.39%
No people is in the room: 13.50%
         Potluck: 6.89%
      Decorating: 3.37%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_268_db99c63a6dd1ae94717ec2162d2d848b.wav
Ground Truth: Glass breaking
Top predictions:
      Decorating: 38.36%
No people is in the room: 27.24%
   Setting table: 6.31%
       Showering: 4.56%
   Doing laundry: 3.90%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_338_6c47116b98e3fbf4bebff8306d76c1e5.wav
Ground Truth: Glass breaking
Top predictions:


 27%|██▋       | 114/422 [00:21<00:26, 11.55it/s]

No people is in the room: 23.48%
    Sitting down: 21.80%
         Walking: 16.33%
        Knitting: 11.69%
         Working: 5.26%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_339_155373fd3ddf5a51e8cf6ca7118ba066.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 75.91%
      Meditating: 8.31%
    Sitting down: 2.14%
         Singing: 1.42%
         Working: 1.36%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_341_a46af6e9de2f6da4d6a1f39929fcbc66.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 62.91%
Picking up objects: 8.49%
  Glass breaking: 7.06%
   Setting table: 5.78%
Furniture Collapse: 4.90%



 28%|██▊       | 118/422 [00:22<00:26, 11.49it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_342_a693427693d1adc1863508df0020a2a6.wav
Ground Truth: Glass breaking
Top predictions:
         Walking: 52.34%
Unattended cooking: 24.78%
Smoke or Fire or Open flame: 4.07%
      Decorating: 3.72%
Theft or vandalism: 3.58%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_343_da7ea98de9655fa322090073b532de11.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 98.42%
    Sitting down: 0.63%
      Meditating: 0.30%
        Sleeping: 0.10%
         Working: 0.07%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_344_914db26a201e7a04c18e1b9a9486b0da.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 91.04%
Furniture Collapse: 3.95%
Theft or vandalism: 1.76%
No people is in the room: 1.51%
        Break in: 0.89%



 28%|██▊       | 120/422 [00:22<00:26, 11.48it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_350_0195370dba6b2db52644a26b6d0a5d0e.wav
Ground Truth: Glass breaking
Top predictions:
         Walking: 41.26%
No people is in the room: 36.89%
   Taking photos: 6.40%
     Eating food: 4.00%
         Talking: 2.04%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_351_3ead4ffc8296df2f1368913158ca0385.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 79.54%
    Making noise: 7.43%
      Meditating: 5.24%
         Smoking: 2.16%
      Exercising: 1.41%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_361_8a5af523d2523e4ceb78b3152e87a8b3.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 92.43%
         Talking: 2.95%
         Walking: 2.54%
        Chatting: 0.48%
     Eating food: 0.21%



 29%|██▉       | 124/422 [00:22<00:25, 11.47it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_365_a57ea42783b0004878c7cfd0a52580ec.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 80.17%
        Studying: 3.93%
   Doing laundry: 2.41%
         Talking: 1.62%
    Sitting down: 1.51%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_367_677bcdf30b880128cb72a58b4001c12b.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 83.10%
         Talking: 5.47%
   Doing laundry: 2.10%
         Working: 1.95%
        Chatting: 1.29%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_385_f1353c6e4a3cdc2c6c47db9b81f11353.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 96.29%
    Sitting down: 0.69%
         Working: 0.60%
        Sleeping: 0.36%
         Talking: 0.28%



 30%|██▉       | 126/422 [00:22<00:25, 11.65it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_424_986fb6b13176cebb4c81e2dd9edda3ef.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 91.35%
         Working: 3.05%
    Sitting down: 1.39%
         Talking: 0.81%
        Studying: 0.71%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_443_000b143bd21afde84ae6e8907628d244.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 76.18%
         Walking: 10.46%
         Talking: 2.15%
   Taking photos: 1.65%
   Doing laundry: 1.63%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_445_49fe4921b063e1a0fd09212d1a777902.wav
Ground Truth: Glass breaking
Top predictions:
         Walking: 90.97%
No people is in the room: 1.61%
         Working: 1.25%
   Doing laundry: 1.18%
         Running: 0.94%



 31%|███       | 130/422 [00:23<00:24, 11.92it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_456_52eb80b72dd86bdc747baf2a5c4a1982.wav
Ground Truth: Glass breaking
Top predictions:
         Walking: 79.92%
Unattended cooking: 9.18%
     Eating food: 1.97%
        Drinking: 1.66%
        Flooding: 1.36%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_462_aead4cd34c41104585f6a1e1097c0af0.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 66.60%
   Taking photos: 6.62%
     Eating food: 4.14%
   Mopping floor: 3.83%
         Walking: 3.26%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_472_3c02e8e5d8e6b1418d99db6d315538e9.wav
Ground Truth: Glass breaking
Top predictions:
         Walking: 42.27%
No people is in the room: 31.82%
         Working: 6.94%
   Doing laundry: 4.03%
     Eating food: 3.37%



 31%|███▏      | 132/422 [00:23<00:24, 12.01it/s]


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_489_1e9aaa54548513a7b7786cfc96081d7a.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 91.15%
No people is in the room: 5.05%
Furniture Collapse: 1.07%
          Dining: 0.56%
        Break in: 0.42%


File: datasets/train/Glass breaking/mixture_evaltest_glassbreak_492_67d149ed57d3bc4b08a0ff329d797db9.wav
Ground Truth: Glass breaking
Top predictions:
No people is in the room: 81.93%
         Walking: 4.85%
        Sleeping: 1.00%
        Tripping: 0.85%
         Talking: 0.77%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_018_54726d75a93470398be935f067e04f68.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 88.65%
        Chatting: 3.57%
         Talking: 2.02%
          Dining: 1.94%
     Eating food: 1.18%



 32%|███▏      | 136/422 [00:23<00:23, 12.12it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_042_262fde5f6abc5cef9abac6a23a6446c1.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 96.22%
         Talking: 1.57%
        Chatting: 0.39%
         Working: 0.27%
     Eating food: 0.23%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_089_f7f54b99ef88a63a4a4b446bdd81605d.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 89.72%
   Doing laundry: 4.96%
         Working: 2.93%
Unattended cooking: 0.59%
       Showering: 0.32%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_101_204292a65ead10e68c9356d14bfceca6.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 96.16%
    Sitting down: 1.03%
         Working: 0.61%
   Setting table: 0.60%
      Decorating: 0.31%



 33%|███▎      | 138/422 [00:23<00:23, 12.15it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_113_17cf9cd8beb5acaec026e2ec71adc2ac.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 38.33%
   Doing laundry: 22.28%
No people is in the room: 8.34%
     Eating food: 6.53%
         Working: 3.97%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_117_17f0210c7068fc06bf184361326c7f14.wav
Ground Truth: Gunshot
Top predictions:
   Doing laundry: 35.16%
No people is in the room: 34.04%
         Walking: 15.42%
         Working: 3.62%
        Tripping: 1.64%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_150_2102148ef7867f0164c65a8554d6487e.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 43.69%
Unattended cooking: 13.79%
No people is in the room: 8.69%
   Doing laundry: 7.00%
        Flooding: 3.60%



 34%|███▎      | 142/422 [00:24<00:23, 12.08it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_151_4b399ccee4631026958ef22b7fb3f8b6.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 33.03%
      Decorating: 18.81%
         Walking: 9.02%
          Dining: 4.97%
         Working: 3.54%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_194_2f8712332da25804318a568476a87054.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 92.13%
    Sitting down: 5.31%
         Working: 1.03%
   Doing laundry: 0.27%
   Mopping floor: 0.26%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_196_796df70596c8c2cc51778209fbdae35d.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 22.37%
         Smoking: 21.23%
        Drinking: 10.69%
         Walking: 10.01%
Unattended cooking: 5.62%



 34%|███▍      | 144/422 [00:24<00:23, 12.06it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_213_02e2c2cf3dd650e2cfcfa401157b88c7.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 74.57%
   Doing laundry: 21.74%
         Working: 1.27%
     Eating food: 0.44%
Unattended cooking: 0.35%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_217_3f9b00cf7172d45cf23f6142a90c21e1.wav
Ground Truth: Gunshot
Top predictions:
        Knitting: 39.72%
         Walking: 21.52%
   Taking photos: 5.57%
    Sitting down: 4.49%
        Drinking: 3.76%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_218_3782064d387e20d38d3c45d7af263ce2.wav
Ground Truth: Gunshot
Top predictions:
Unattended cooking: 37.49%
No people is in the room: 31.18%
         Smoking: 13.29%
         Cooking: 10.24%
        Drinking: 2.60%



 35%|███▌      | 148/422 [00:24<00:22, 12.10it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_233_2127e0653a25b8819ed7e418fa7f2d24.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 49.50%
    Sitting down: 23.51%
        Sleeping: 5.75%
         Walking: 3.38%
      Meditating: 2.52%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_263_5d6ca9ca4b3c54256ba6c247288fe19b.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 28.00%
        Knitting: 17.57%
    Sitting down: 5.47%
          Normal: 5.05%
        Sleeping: 4.90%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_265_d985368362bf832c10c6f529969d39f2.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 29.83%
        Knitting: 28.69%
    Sitting down: 8.96%
        Drinking: 3.21%
   Taking photos: 3.14%



 36%|███▌      | 150/422 [00:24<00:22, 12.09it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_280_7ccc0504e7a9a28ee4b363df257b0b28.wav
Ground Truth: Gunshot
Top predictions:
         Talking: 42.26%
No people is in the room: 11.98%
         Smoking: 6.31%
     Eating food: 5.30%
        Studying: 3.94%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_299_7dd9c08d2915185b36dd923684078848.wav
Ground Truth: Gunshot
Top predictions:
         Talking: 42.94%
No people is in the room: 10.52%
     Eating food: 7.80%
         Choking: 7.67%
Domestic violence: 6.26%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_327_bdcb8a004594adefe5ee6a1d812a6ada.wav
Ground Truth: Gunshot
Top predictions:
       Showering: 82.53%
No people is in the room: 10.99%
   Mopping floor: 3.04%
   Doing laundry: 1.25%
      Decorating: 0.75%



 36%|███▋      | 154/422 [00:25<00:22, 12.08it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_354_d0339f801fafac4621e351c70aac1b97.wav
Ground Truth: Gunshot
Top predictions:
    Sitting down: 37.10%
         Walking: 14.86%
No people is in the room: 11.31%
        Sleeping: 8.39%
      Meditating: 7.62%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_375_fa698e7fba20027bfc86d461eff5b7e5.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 75.18%
   Doing laundry: 3.32%
    Sitting down: 2.91%
         Walking: 2.37%
Playing with pets: 2.14%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_398_fdbc79a9c7616fce58b4e6e8b7691127.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 46.70%
Unattended cooking: 6.38%
         Smoking: 5.99%
         Talking: 5.95%
      Using drug: 5.56%



 37%|███▋      | 156/422 [00:25<00:21, 12.15it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_408_0cc1186adeffec5ecb8e2aedc0ac5860.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 50.42%
No people is in the room: 39.99%
         Working: 2.53%
         Talking: 2.00%
    Sitting down: 0.93%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_414_259bcc20aa36899e225998341d952201.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 45.03%
    Sitting down: 7.31%
        Sleeping: 5.10%
        Studying: 4.19%
         Smoking: 3.71%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_423_0ceabd329fea9a7e7135e6da32b31d83.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 97.51%
    Sitting down: 0.98%
         Working: 0.25%
   Setting table: 0.15%
         Walking: 0.15%



 38%|███▊      | 160/422 [00:25<00:21, 12.19it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_424_6bc7ce3de687324fae214bcc3d9deb97.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 51.36%
        Knitting: 24.86%
         Walking: 6.82%
   Taking photos: 2.31%
         Singing: 1.83%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_425_4d014fb6ffee4603103492ae40df2c07.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 65.93%
         Walking: 8.70%
   Doing laundry: 6.01%
         Working: 2.92%
   Taking photos: 2.44%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_428_a57f792a50ffb934d4979a060bbd8a53.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 99.53%
         Talking: 0.21%
         Working: 0.04%
        Chatting: 0.03%
      Meditating: 0.03%



 38%|███▊      | 162/422 [00:25<00:21, 12.22it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_430_fcc2a04fb4c225afa830a348f0f578aa.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 80.79%
         Talking: 4.93%
   Doing laundry: 2.56%
         Walking: 1.79%
        Chatting: 1.28%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_436_8f71134a16bca0778609f7ee304b8226.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 56.20%
   Setting table: 12.73%
   Doing laundry: 6.51%
      Decorating: 4.41%
         Working: 3.47%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_441_e0da956f691e274817238d8d92b5dfbd.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 82.31%
     Eating food: 8.63%
          Dining: 2.26%
Unattended cooking: 1.41%
   Doing laundry: 1.11%



 39%|███▉      | 166/422 [00:26<00:20, 12.24it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_442_ec4f6e844246a7ace7defb0f9dadf3cb.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 58.35%
     Eating food: 6.02%
         Smoking: 3.96%
         Talking: 3.79%
    Making noise: 3.48%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_448_89e80a662a6892e3c4ec9b40d1564204.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 90.23%
         Running: 7.98%
        Tripping: 0.61%
No people is in the room: 0.33%
        Knitting: 0.28%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_451_7a8524ea1ad916ec5a2b005af07e0211.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 45.58%
        Studying: 8.78%
         Walking: 7.48%
         Working: 5.42%
   Doing laundry: 5.21%



 40%|███▉      | 168/422 [00:26<00:20, 12.24it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_453_bc624b1e38cf9d8e6de76e728f1c7252.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 80.40%
         Walking: 2.98%
      Decorating: 2.30%
    Sitting down: 1.88%
         Working: 1.41%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_458_221b3a2322f4c8c04ee4ce0d58a0e26f.wav
Ground Truth: Gunshot
Top predictions:
      Decorating: 42.78%
   Setting table: 17.33%
No people is in the room: 13.27%
   Mopping floor: 6.00%
       Showering: 4.26%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_461_ba8e84a8770a9fbc5e4ac2995eccb668.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 96.15%
    Sitting down: 2.01%
   Doing laundry: 0.46%
         Working: 0.43%
   Mopping floor: 0.28%



 41%|████      | 172/422 [00:26<00:20, 12.22it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_462_e157f569db5a5099ab41098a727b0ca3.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 93.53%
   Mopping floor: 1.71%
   Doing laundry: 1.65%
      Decorating: 0.58%
   Setting table: 0.50%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_466_68ad378c08a63ff99140223e35dd0cab.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 88.11%
         Gunshot: 3.09%
         Working: 1.88%
   Doing laundry: 1.75%
Theft or vandalism: 1.41%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_471_9841bc2765e3b22bfae353e2dfb1e43c.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 76.01%
         Talking: 11.25%
         Walking: 4.25%
        Chatting: 2.14%
   Taking photos: 2.01%



 41%|████      | 174/422 [00:26<00:20, 12.17it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_473_d43023a9c8be3af808faaab32179e0e8.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 43.81%
        Knitting: 20.37%
        Flooding: 7.96%
   Taking photos: 4.25%
         Working: 3.54%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_475_729037c3cf525e5796cb57c4fa4bebfb.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 70.81%
   Doing laundry: 13.31%
   Mopping floor: 7.41%
      Decorating: 2.31%
   Setting table: 1.89%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_479_96cfc9822829fc30ad17ee05458cf80f.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 45.53%
        Knitting: 15.94%
        Tripping: 12.08%
      Exercising: 6.98%
    Sitting down: 6.30%



 42%|████▏     | 178/422 [00:27<00:20, 12.18it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_480_758d54f0493a25aab43fa6f974ada751.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 61.12%
   Doing laundry: 31.95%
         Working: 1.68%
   Mopping floor: 0.72%
         Walking: 0.68%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_486_53354a65d5caadc898096b57d095d6a0.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 40.60%
         Talking: 15.07%
         Walking: 11.71%
      Exercising: 3.94%
    Sitting down: 3.39%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_489_cafb1875fc7030501fba04161a868f05.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 75.74%
      Exercising: 2.17%
   Taking photos: 2.15%
         Smoking: 2.03%
         Singing: 1.72%



 43%|████▎     | 180/422 [00:27<00:19, 12.19it/s]


File: datasets/train/Gunshot/mixture_evaltest_gunshot_490_230f021c9f5fd6aabb17a7530e7313ad.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 77.04%
   Doing laundry: 5.02%
Unattended cooking: 4.34%
Theft or vandalism: 1.79%
        Drinking: 1.50%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_491_b585a236d29f381e16d625b455063b3e.wav
Ground Truth: Gunshot
Top predictions:
         Walking: 70.88%
         Talking: 12.40%
No people is in the room: 10.17%
   Taking photos: 1.58%
Unattended cooking: 1.20%


File: datasets/train/Gunshot/mixture_evaltest_gunshot_493_87b137fe3d06f960348a235c147666db.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 94.14%
    Sitting down: 1.15%
   Setting table: 0.74%
         Working: 0.68%
         Talking: 0.62%



                                                 


File: datasets/train/Gunshot/mixture_evaltest_gunshot_497_f5ca6ab4416694b7aa1e973364683d7b.wav
Ground Truth: Gunshot
Top predictions:
No people is in the room: 90.69%
         Talking: 1.88%
         Working: 1.49%
   Doing laundry: 1.12%
        Sleeping: 1.04%

datasets/audios/train/Normal


 43%|████▎     | 182/422 [00:27<00:19, 12.13it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/9gK-lchw9AU_000019_000029.wav


 43%|████▎     | 182/422 [00:27<00:19, 12.13it/s]

MoviePy - Done.

File: datasets/train/Normal/9gK-lchw9AU_000019_000029.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 42.14%
         Talking: 17.36%
     Suffocating: 6.66%
        Speaking: 5.12%
Making phone calls: 3.77%



 43%|████▎     | 182/422 [00:28<00:19, 12.13it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9iKBdRiyrT8_000018_000028.wav


 44%|████▎     | 184/422 [00:28<00:36,  6.55it/s]

MoviePy - Done.

File: datasets/train/Normal/9iKBdRiyrT8_000018_000028.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 87.07%
Playing with pets: 10.37%
        Drinking: 1.30%
     Suffocating: 0.32%
         Choking: 0.24%



 44%|████▎     | 184/422 [00:28<00:36,  6.55it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9j29_SoXvnk_000004_000014.wav


 44%|████▍     | 185/422 [00:28<00:42,  5.57it/s]

MoviePy - Done.

File: datasets/train/Normal/9j29_SoXvnk_000004_000014.mp4
Ground Truth: Normal
Top predictions:
  Brushing teeth: 42.71%
       Showering: 41.71%
        Cleaning: 5.58%
Playing with pets: 4.76%
      Using drug: 1.19%



 44%|████▍     | 185/422 [00:28<00:42,  5.57it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9khsuBlB7KM_000052_000062.wav


 44%|████▍     | 186/422 [00:28<00:49,  4.79it/s]

MoviePy - Done.

File: datasets/train/Normal/9khsuBlB7KM_000052_000062.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 59.03%
   Doing laundry: 7.73%
     Celebrating: 4.34%
     Suffocating: 3.30%
    Sitting down: 2.90%



 44%|████▍     | 186/422 [00:29<00:49,  4.79it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9obe2uzQuLw_000031_000041.wav


 44%|████▍     | 187/422 [00:29<00:54,  4.34it/s]

MoviePy - Done.

File: datasets/train/Normal/9obe2uzQuLw_000031_000041.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 59.99%
          Dining: 19.72%
        Chatting: 2.54%
   Setting table: 2.39%
        Drinking: 2.03%



 44%|████▍     | 187/422 [00:29<00:54,  4.34it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/a2PzfV-nLA0_000273_000283.wav


 45%|████▍     | 188/422 [00:29<00:58,  3.97it/s]

MoviePy - Done.

File: datasets/train/Normal/a2PzfV-nLA0_000273_000283.mp4
Ground Truth: Normal
Top predictions:
         Cooking: 75.02%
Unattended cooking: 16.24%
       Showering: 3.44%
        Cleaning: 2.52%
   Setting table: 0.88%



 45%|████▍     | 188/422 [00:29<00:58,  3.97it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/A65W8cR_unk_000110_000120.wav


 45%|████▍     | 189/422 [00:29<01:05,  3.57it/s]

MoviePy - Done.

File: datasets/train/Normal/A65W8cR_unk_000110_000120.mp4
Ground Truth: Normal
Top predictions:
         Reading: 61.30%
        Speaking: 15.81%
         Talking: 6.39%
No people is in the room: 3.98%
Playing board games: 3.94%



 45%|████▍     | 189/422 [00:30<01:05,  3.57it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/a6RPzcU3bzA_000007_000017.wav


 45%|████▌     | 190/422 [00:30<01:06,  3.50it/s]

MoviePy - Done.

File: datasets/train/Normal/a6RPzcU3bzA_000007_000017.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 28.95%
     Celebrating: 21.22%
Playing with pets: 8.17%
         Dancing: 7.93%
          Dining: 5.32%



 45%|████▌     | 190/422 [00:30<01:06,  3.50it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/A7Th1TrWh8U_000086_000096.wav


 45%|████▌     | 191/422 [00:30<01:07,  3.45it/s]

MoviePy - Done.

File: datasets/train/Normal/A7Th1TrWh8U_000086_000096.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 19.14%
        Knitting: 9.47%
  Using computer: 8.47%
No people is in the room: 8.39%
    Sitting down: 5.82%



 45%|████▌     | 191/422 [00:30<01:07,  3.45it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AA-ZLdnX1mA_000060_000070.wav


 45%|████▌     | 192/422 [00:30<01:08,  3.34it/s]

MoviePy - Done.

File: datasets/train/Normal/AA-ZLdnX1mA_000060_000070.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 58.78%
     Eating food: 5.82%
        Chatting: 3.78%
  Using computer: 3.55%
         Talking: 3.08%

This video datasets/train/Normal/adl-01-cam0-rgb.mp4 has no audio!


 46%|████▌     | 194/422 [00:30<00:45,  5.01it/s]

This video datasets/train/Normal/adl-02-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-03-cam0-rgb.mp4 has no audio!


 46%|████▋     | 196/422 [00:31<00:34,  6.60it/s]

This video datasets/train/Normal/adl-04-cam0-rgb.mp4 has no audio!


 47%|████▋     | 198/422 [00:31<00:27,  8.04it/s]

This video datasets/train/Normal/adl-05-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-06-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-07-cam0-rgb.mp4 has no audio!


 47%|████▋     | 200/422 [00:31<00:23,  9.26it/s]

This video datasets/train/Normal/adl-08-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-09-cam0-rgb.mp4 has no audio!


 48%|████▊     | 202/422 [00:31<00:21, 10.24it/s]

This video datasets/train/Normal/adl-10-cam0-rgb.mp4 has no audio!


 48%|████▊     | 204/422 [00:31<00:19, 11.01it/s]

This video datasets/train/Normal/adl-11-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-12-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-13-cam0-rgb.mp4 has no audio!


 49%|████▉     | 206/422 [00:31<00:18, 11.61it/s]

This video datasets/train/Normal/adl-14-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-15-cam0-rgb.mp4 has no audio!


 49%|████▉     | 208/422 [00:31<00:17, 12.04it/s]

This video datasets/train/Normal/adl-16-cam0-rgb.mp4 has no audio!


 50%|████▉     | 210/422 [00:32<00:17, 12.32it/s]

This video datasets/train/Normal/adl-17-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-18-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-19-cam0-rgb.mp4 has no audio!


 50%|█████     | 212/422 [00:32<00:16, 12.36it/s]

This video datasets/train/Normal/adl-20-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-21-cam0-rgb.mp4 has no audio!


 51%|█████     | 214/422 [00:32<00:16, 12.56it/s]

This video datasets/train/Normal/adl-22-cam0-rgb.mp4 has no audio!


 51%|█████     | 216/422 [00:32<00:16, 12.69it/s]

This video datasets/train/Normal/adl-23-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-24-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-25-cam0-rgb.mp4 has no audio!


 52%|█████▏    | 218/422 [00:32<00:15, 12.82it/s]

This video datasets/train/Normal/adl-26-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-27-cam0-rgb.mp4 has no audio!


 53%|█████▎    | 222/422 [00:33<00:16, 11.99it/s]

This video datasets/train/Normal/adl-28-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-29-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-30-cam0-rgb.mp4 has no audio!


 53%|█████▎    | 224/422 [00:33<00:16, 12.34it/s]

This video datasets/train/Normal/adl-31-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-32-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-33-cam0-rgb.mp4 has no audio!


 54%|█████▍    | 228/422 [00:33<00:15, 12.81it/s]

This video datasets/train/Normal/adl-34-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-35-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-36-cam0-rgb.mp4 has no audio!


 55%|█████▍    | 230/422 [00:33<00:14, 12.93it/s]

This video datasets/train/Normal/adl-37-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-38-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-39-cam0-rgb.mp4 has no audio!


 55%|█████▍    | 232/422 [00:34<00:14, 13.01it/s]

This video datasets/train/Normal/adl-40-cam0-rgb.mp4 has no audio!
datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/admrQMs57JQ_000002_000012.wav


 55%|█████▍    | 232/422 [00:34<00:14, 13.01it/s]

MoviePy - Done.


 55%|█████▍    | 232/422 [00:34<00:14, 13.01it/s]


File: datasets/train/Normal/admrQMs57JQ_000002_000012.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 86.85%
         Working: 2.32%
         Talking: 1.97%
    Sitting down: 1.93%
   Setting table: 1.23%

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AdwS9UEt1Jg_000125_000135.wav


 55%|█████▌    | 234/422 [00:34<00:34,  5.43it/s]

MoviePy - Done.

File: datasets/train/Normal/AdwS9UEt1Jg_000125_000135.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 30.24%
  Using computer: 19.23%
      Using drug: 12.25%
Organizing space: 5.72%
         Reading: 4.42%



 55%|█████▌    | 234/422 [00:34<00:34,  5.43it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ADzB-RTmLJU_000004_000014.wav


 56%|█████▌    | 235/422 [00:35<00:38,  4.81it/s]

MoviePy - Done.

File: datasets/train/Normal/ADzB-RTmLJU_000004_000014.mp4
Ground Truth: Normal
Top predictions:
   Doing laundry: 85.36%
        Cleaning: 4.91%
         Working: 2.90%
        Studying: 1.49%
          Sewing: 1.06%



 56%|█████▌    | 235/422 [00:35<00:38,  4.81it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AG7rHLw-ckU_000301_000311.wav


 56%|█████▌    | 236/422 [00:35<00:42,  4.40it/s]

MoviePy - Done.

File: datasets/train/Normal/AG7rHLw-ckU_000301_000311.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 71.45%
   Setting table: 5.18%
         Working: 4.79%
          Dining: 3.01%
  Using computer: 2.28%



 56%|█████▌    | 236/422 [00:35<00:42,  4.40it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AGmRgVcOLYA_000061_000071.wav


 56%|█████▌    | 237/422 [00:35<00:46,  3.96it/s]

MoviePy - Done.

File: datasets/train/Normal/AGmRgVcOLYA_000061_000071.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 40.38%
     Celebrating: 8.85%
No people is in the room: 8.77%
          Dining: 5.93%
Playing board games: 4.52%



 56%|█████▌    | 237/422 [00:35<00:46,  3.96it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AIrIuRyN5VA_000097_000107.wav


 56%|█████▋    | 238/422 [00:36<00:49,  3.71it/s]

MoviePy - Done.

File: datasets/train/Normal/AIrIuRyN5VA_000097_000107.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 76.60%
       Vacumning: 5.50%
         Cooking: 3.06%
No people is in the room: 1.78%
        Cleaning: 1.69%



 56%|█████▋    | 238/422 [00:36<00:49,  3.71it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/aKKNRI3PYYs_000130_000140.wav


 57%|█████▋    | 239/422 [00:36<00:54,  3.38it/s]

MoviePy - Done.

File: datasets/train/Normal/aKKNRI3PYYs_000130_000140.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 69.34%
Playing with pets: 6.79%
   Mopping floor: 5.82%
     Celebrating: 4.45%
       Showering: 2.22%



 57%|█████▋    | 239/422 [00:36<00:54,  3.38it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ALuLyCkzCt8_000060_000070.wav


 57%|█████▋    | 240/422 [00:36<00:55,  3.28it/s]

MoviePy - Done.

File: datasets/train/Normal/ALuLyCkzCt8_000060_000070.mp4
Ground Truth: Normal
Top predictions:
        Cleaning: 41.95%
   Mopping floor: 18.49%
       Showering: 16.57%
   Doing laundry: 5.15%
        Painting: 3.32%



 57%|█████▋    | 240/422 [00:36<00:55,  3.28it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/anDbHNhATAE_000039_000049.wav


 57%|█████▋    | 241/422 [00:37<00:56,  3.19it/s]

MoviePy - Done.

File: datasets/train/Normal/anDbHNhATAE_000039_000049.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 59.89%
     Eating food: 7.24%
No people is in the room: 6.78%
    Sitting down: 5.71%
   Mopping floor: 3.94%



 57%|█████▋    | 241/422 [00:37<00:56,  3.19it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/anzXsViG6wc_000370_000380.wav


 57%|█████▋    | 242/422 [00:37<00:57,  3.12it/s]

MoviePy - Done.

File: datasets/train/Normal/anzXsViG6wc_000370_000380.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 38.59%
        Studying: 32.67%
        Painting: 7.71%
Organizing space: 4.58%
         Working: 4.25%



 57%|█████▋    | 242/422 [00:37<00:57,  3.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AObYuoaz-7o_000245_000255.wav


 58%|█████▊    | 243/422 [00:37<00:57,  3.09it/s]

MoviePy - Done.

File: datasets/train/Normal/AObYuoaz-7o_000245_000255.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 99.88%
        Drinking: 0.08%
         Smoking: 0.02%
         Choking: 0.01%
No people is in the room: 0.00%



 58%|█████▊    | 243/422 [00:37<00:57,  3.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/aprcI6-IHmY_000146_000156.wav


 58%|█████▊    | 244/422 [00:38<00:57,  3.11it/s]

MoviePy - Done.

File: datasets/train/Normal/aprcI6-IHmY_000146_000156.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 50.63%
   Setting table: 25.95%
      Exercising: 9.36%
No people is in the room: 8.34%
   Mopping floor: 2.12%



 58%|█████▊    | 244/422 [00:38<00:57,  3.11it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/auSl2RAONs4_000007_000017.wav


 58%|█████▊    | 245/422 [00:38<00:56,  3.11it/s]

MoviePy - Done.

File: datasets/train/Normal/auSl2RAONs4_000007_000017.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 21.58%
   Setting table: 15.32%
No people is in the room: 15.08%
         Talking: 11.81%
        Chatting: 5.34%



 58%|█████▊    | 245/422 [00:38<00:56,  3.11it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AV4t72IYADk_000048_000058.wav


 58%|█████▊    | 246/422 [00:38<00:58,  3.02it/s]

MoviePy - Done.

File: datasets/train/Normal/AV4t72IYADk_000048_000058.mp4
Ground Truth: Normal
Top predictions:
        Cleaning: 31.77%
      Decorating: 9.05%
   Doing laundry: 8.76%
   Setting table: 6.81%
    Sitting down: 6.08%



 58%|█████▊    | 246/422 [00:38<00:58,  3.02it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AvgqtvtOgF4_000443_000453.wav


 59%|█████▊    | 247/422 [00:39<00:58,  3.02it/s]

MoviePy - Done.

File: datasets/train/Normal/AvgqtvtOgF4_000443_000453.mp4
Ground Truth: Normal
Top predictions:
Playing board games: 69.50%
No people is in the room: 9.33%
        Studying: 3.78%
         Reading: 3.70%
  Using computer: 3.05%



 59%|█████▊    | 247/422 [00:39<00:58,  3.02it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/aWEk9Sfs67c_000194_000204.wav


 59%|█████▉    | 248/422 [00:39<00:56,  3.09it/s]

MoviePy - Done.

File: datasets/train/Normal/aWEk9Sfs67c_000194_000204.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 50.71%
     Eating food: 25.76%
Playing with pets: 6.89%
    Sitting down: 3.45%
        Drinking: 2.29%



 59%|█████▉    | 248/422 [00:39<00:56,  3.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Awm6MWRRAa4_000222_000232.wav


 59%|█████▉    | 249/422 [00:39<00:55,  3.14it/s]

MoviePy - Done.

File: datasets/train/Normal/Awm6MWRRAa4_000222_000232.mp4
Ground Truth: Normal
Top predictions:
   Doing laundry: 29.02%
         Cooking: 27.72%
No people is in the room: 17.37%
       Showering: 10.51%
   Setting table: 5.34%



 59%|█████▉    | 249/422 [00:39<00:55,  3.14it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AY1CsIuu3jg_000229_000239.wav


 59%|█████▉    | 250/422 [00:40<00:56,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/AY1CsIuu3jg_000229_000239.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 45.10%
Unattended cooking: 13.38%
         Cooking: 11.51%
   Doing laundry: 9.52%
  Using computer: 7.21%



 59%|█████▉    | 250/422 [00:40<00:56,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AYdyPzM8oL8_000004_000014.wav


 59%|█████▉    | 251/422 [00:40<00:55,  3.11it/s]

MoviePy - Done.

File: datasets/train/Normal/AYdyPzM8oL8_000004_000014.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 44.51%
         Talking: 18.84%
No people is in the room: 18.07%
Playing with pets: 5.45%
        Speaking: 2.79%



 59%|█████▉    | 251/422 [00:40<00:55,  3.11it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/B-SQ8cYQSMk_000222_000232.wav


 60%|█████▉    | 252/422 [00:40<00:55,  3.04it/s]

MoviePy - Done.

File: datasets/train/Normal/B-SQ8cYQSMk_000222_000232.mp4
Ground Truth: Normal
Top predictions:
   Doing laundry: 30.86%
No people is in the room: 21.22%
   Setting table: 9.68%
        Cleaning: 5.88%
      Decorating: 5.66%



 60%|█████▉    | 252/422 [00:40<00:55,  3.04it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/B9JqNr0e-Pw_000022_000032.wav


 60%|█████▉    | 253/422 [00:41<00:56,  3.00it/s]

MoviePy - Done.

File: datasets/train/Normal/B9JqNr0e-Pw_000022_000032.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 26.80%
   Doing laundry: 19.67%
No people is in the room: 13.96%
       Showering: 7.55%
   Setting table: 7.45%



 60%|█████▉    | 253/422 [00:41<00:56,  3.00it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/baf8O6sSMLI_000053_000063.wav


 60%|██████    | 254/422 [00:41<00:54,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/baf8O6sSMLI_000053_000063.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 53.74%
No people is in the room: 20.74%
      Exercising: 3.67%
    Sitting down: 2.60%
         Working: 2.19%



 60%|██████    | 254/422 [00:41<00:54,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BDbTkmhDM2A_000087_000097.wav


 60%|██████    | 255/422 [00:41<00:55,  3.02it/s]

MoviePy - Done.

File: datasets/train/Normal/BDbTkmhDM2A_000087_000097.mp4
Ground Truth: Normal
Top predictions:
Playing board games: 20.55%
No people is in the room: 16.24%
Playing with pets: 14.62%
         Reading: 7.78%
         Talking: 5.80%



 60%|██████    | 255/422 [00:41<00:55,  3.02it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BiqJ_FkwCUQ_000006_000016.wav


 61%|██████    | 256/422 [00:41<00:54,  3.07it/s]

MoviePy - Done.

File: datasets/train/Normal/BiqJ_FkwCUQ_000006_000016.mp4
Ground Truth: Normal
Top predictions:
        Studying: 37.79%
         Working: 22.33%
      Decorating: 10.12%
        Painting: 8.27%
   Setting table: 7.60%



 61%|██████    | 256/422 [00:42<00:54,  3.07it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BjFtM8jhFoU_000008_000018.wav


 61%|██████    | 257/422 [00:42<00:52,  3.12it/s]

MoviePy - Done.

File: datasets/train/Normal/BjFtM8jhFoU_000008_000018.mp4
Ground Truth: Normal
Top predictions:
         Dancing: 19.78%
      Using drug: 12.43%
   Playing music: 6.92%
      Decorating: 6.17%
Playing with pets: 5.52%



 61%|██████    | 257/422 [00:42<00:52,  3.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bKGIbOPMSPQ_000002_000012.wav


 61%|██████    | 258/422 [00:42<00:54,  3.02it/s]

MoviePy - Done.

File: datasets/train/Normal/bKGIbOPMSPQ_000002_000012.mp4
Ground Truth: Normal
Top predictions:
   Doing laundry: 71.52%
       Showering: 16.49%
         Working: 9.44%
         Running: 0.75%
        Cleaning: 0.35%



 61%|██████    | 258/422 [00:42<00:54,  3.02it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BL0sZO7hrIc_000427_000437.wav


 61%|██████▏   | 259/422 [00:42<00:53,  3.07it/s]

MoviePy - Done.

File: datasets/train/Normal/BL0sZO7hrIc_000427_000437.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 37.46%
         Smoking: 17.44%
Theft or vandalism: 9.02%
      Exercising: 4.08%
        Painting: 4.01%



 61%|██████▏   | 259/422 [00:43<00:53,  3.07it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bNRAN0wh0tc_000046_000056.wav


 62%|██████▏   | 260/422 [00:43<00:52,  3.07it/s]

MoviePy - Done.

File: datasets/train/Normal/bNRAN0wh0tc_000046_000056.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 45.94%
No people is in the room: 22.05%
         Smoking: 11.97%
        Drinking: 6.41%
        Chatting: 5.27%



 62%|██████▏   | 260/422 [00:43<00:52,  3.07it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BOc3MlonHb8_000075_000085.wav


 62%|██████▏   | 261/422 [00:43<00:54,  2.95it/s]

MoviePy - Done.

File: datasets/train/Normal/BOc3MlonHb8_000075_000085.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 18.66%
     Celebrating: 16.25%
      Decorating: 10.28%
  Using computer: 10.07%
      Meditating: 6.62%



 62%|██████▏   | 261/422 [00:43<00:54,  2.95it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bPfJAqym2vc_000019_000029.wav


 62%|██████▏   | 262/422 [00:44<00:54,  2.94it/s]

MoviePy - Done.

File: datasets/train/Normal/bPfJAqym2vc_000019_000029.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 58.75%
      Using drug: 13.51%
   Setting table: 4.07%
        Painting: 3.53%
        Studying: 3.26%



 62%|██████▏   | 262/422 [00:44<00:54,  2.94it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bPIP4BmjDAw_000070_000080.wav


 62%|██████▏   | 263/422 [00:44<00:53,  2.98it/s]

MoviePy - Done.

File: datasets/train/Normal/bPIP4BmjDAw_000070_000080.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 45.37%
  Using computer: 12.20%
Organizing space: 8.61%
      Decorating: 7.40%
      Meditating: 5.15%



 62%|██████▏   | 263/422 [00:44<00:53,  2.98it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Bq0R48bHZnY_000412_000422.wav


 63%|██████▎   | 264/422 [00:44<00:51,  3.08it/s]

MoviePy - Done.

File: datasets/train/Normal/Bq0R48bHZnY_000412_000422.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 51.86%
   Doing laundry: 7.18%
         Working: 6.43%
        Cleaning: 3.79%
        Knitting: 3.64%



 63%|██████▎   | 264/422 [00:44<00:51,  3.08it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bSG07EG2vu8_000007_000017.wav


 63%|██████▎   | 265/422 [00:44<00:50,  3.12it/s]

MoviePy - Done.

File: datasets/train/Normal/bSG07EG2vu8_000007_000017.mp4
Ground Truth: Normal
Top predictions:
         Dancing: 54.77%
      Using drug: 6.48%
     Celebrating: 2.71%
  Using computer: 2.39%
   Playing music: 2.19%



 63%|██████▎   | 265/422 [00:45<00:50,  3.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BSOYvQ1A7_s_000027_000037.wav


 63%|██████▎   | 266/422 [00:45<00:50,  3.10it/s]

MoviePy - Done.

File: datasets/train/Normal/BSOYvQ1A7_s_000027_000037.mp4
Ground Truth: Normal
Top predictions:
          Dining: 20.68%
     Celebrating: 15.46%
No people is in the room: 14.89%
      Using drug: 9.16%
   Setting table: 5.37%



 63%|██████▎   | 266/422 [00:45<00:50,  3.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bt3ELWHiEds_000526_000536.wav


 63%|██████▎   | 267/422 [00:45<00:50,  3.08it/s]

MoviePy - Done.

File: datasets/train/Normal/bt3ELWHiEds_000526_000536.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 77.83%
No people is in the room: 7.05%
Playing with pets: 4.96%
    Sitting down: 2.09%
        Drinking: 2.02%



 63%|██████▎   | 267/422 [00:45<00:50,  3.08it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/btIe4SwFUhg_000080_000090.wav


 64%|██████▎   | 268/422 [00:45<00:51,  3.00it/s]

MoviePy - Done.

File: datasets/train/Normal/btIe4SwFUhg_000080_000090.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 66.77%
Organizing space: 7.43%
    Sitting down: 3.22%
      Using drug: 2.70%
  Using computer: 2.38%



 64%|██████▎   | 268/422 [00:46<00:51,  3.00it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BvdknPz77Hs_000000_000010.wav


 64%|██████▎   | 269/422 [00:46<00:49,  3.07it/s]

MoviePy - Done.

File: datasets/train/Normal/BvdknPz77Hs_000000_000010.mp4
Ground Truth: Normal
Top predictions:
         Reading: 32.63%
No people is in the room: 10.71%
        Speaking: 5.74%
        Studying: 4.57%
        Knitting: 4.27%



 64%|██████▎   | 269/422 [00:46<00:49,  3.07it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BWqOCs4yVo4_000000_000010.wav


 64%|██████▍   | 270/422 [00:46<00:48,  3.14it/s]

MoviePy - Done.

File: datasets/train/Normal/BWqOCs4yVo4_000000_000010.mp4
Ground Truth: Normal
Top predictions:
   Playing music: 66.57%
         Dancing: 22.57%
     Celebrating: 1.89%
      Using drug: 1.60%
      Exercising: 1.35%



 64%|██████▍   | 270/422 [00:46<00:48,  3.14it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BzcfW7JTsGM_000045_000055.wav


 64%|██████▍   | 271/422 [00:46<00:47,  3.19it/s]

MoviePy - Done.

File: datasets/train/Normal/BzcfW7JTsGM_000045_000055.mp4
Ground Truth: Normal
Top predictions:
   Doing laundry: 23.16%
          Crying: 18.04%
     Suffocating: 10.49%
No people is in the room: 10.40%
Playing with pets: 7.73%



 64%|██████▍   | 271/422 [00:47<00:47,  3.19it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BZOSTgHGUCE_000229_000239.wav


 64%|██████▍   | 272/422 [00:47<00:48,  3.10it/s]

MoviePy - Done.

File: datasets/train/Normal/BZOSTgHGUCE_000229_000239.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 38.48%
   Setting table: 17.24%
No people is in the room: 8.86%
  Using computer: 6.42%
      Decorating: 3.87%



 64%|██████▍   | 272/422 [00:47<00:48,  3.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/c1o7820XyTI_000310_000320.wav


 65%|██████▍   | 273/422 [00:47<00:47,  3.15it/s]

MoviePy - Done.

File: datasets/train/Normal/c1o7820XyTI_000310_000320.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 52.15%
No people is in the room: 24.77%
        Sleeping: 4.93%
   Mopping floor: 3.31%
   Doing laundry: 3.13%



 65%|██████▍   | 273/422 [00:47<00:47,  3.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/C3ayjYkczC4_000060_000070.wav


 65%|██████▍   | 274/422 [00:47<00:47,  3.10it/s]

MoviePy - Done.

File: datasets/train/Normal/C3ayjYkczC4_000060_000070.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 85.19%
Playing with pets: 4.42%
         Talking: 3.13%
   Setting table: 1.81%
         Reading: 1.35%



 65%|██████▍   | 274/422 [00:48<00:47,  3.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/C45UEQ5Oxno_000075_000085.wav


 65%|██████▌   | 275/422 [00:48<00:47,  3.10it/s]

MoviePy - Done.

File: datasets/train/Normal/C45UEQ5Oxno_000075_000085.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 28.01%
No people is in the room: 19.27%
   Doing laundry: 18.50%
Playing with pets: 6.14%
         Working: 4.75%



 65%|██████▌   | 275/422 [00:48<00:47,  3.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/c729EVAFb_s_000415_000425.wav


 65%|██████▌   | 276/422 [00:48<00:47,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/c729EVAFb_s_000415_000425.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 28.09%
No people is in the room: 19.07%
         Working: 7.36%
    Sitting down: 6.95%
        Studying: 5.34%



 65%|██████▌   | 276/422 [00:48<00:47,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/caz61OyDs1k_000045_000055.wav


 66%|██████▌   | 277/422 [00:48<00:47,  3.05it/s]

MoviePy - Done.

File: datasets/train/Normal/caz61OyDs1k_000045_000055.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 93.62%
          Dining: 0.97%
     Eating food: 0.78%
      Decorating: 0.67%
No people is in the room: 0.60%



 66%|██████▌   | 277/422 [00:49<00:47,  3.05it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cbXmtG99SU0_000000_000010.wav


 66%|██████▌   | 278/422 [00:49<00:47,  3.03it/s]

MoviePy - Done.

File: datasets/train/Normal/cbXmtG99SU0_000000_000010.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 77.84%
        Laughing: 16.44%
     Eating food: 1.59%
     Celebrating: 1.04%
No people is in the room: 0.85%



 66%|██████▌   | 278/422 [00:49<00:47,  3.03it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cc4YIsJz5sg_000002_000012.wav


 66%|██████▌   | 279/422 [00:49<00:46,  3.09it/s]

MoviePy - Done.

File: datasets/train/Normal/cc4YIsJz5sg_000002_000012.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 15.45%
      Decorating: 11.13%
    Sitting down: 10.84%
      Using drug: 8.22%
  Using computer: 5.41%



 66%|██████▌   | 279/422 [00:49<00:46,  3.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cCJdUn_Dv2M_000181_000191.wav


 66%|██████▋   | 280/422 [00:49<00:45,  3.15it/s]

MoviePy - Done.

File: datasets/train/Normal/cCJdUn_Dv2M_000181_000191.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 72.45%
   Doing laundry: 5.60%
   Mopping floor: 2.70%
    Sitting down: 2.51%
         Talking: 2.16%



 66%|██████▋   | 280/422 [00:49<00:45,  3.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CdtDTYW5YOs_000117_000127.wav


 67%|██████▋   | 281/422 [00:50<00:45,  3.07it/s]

MoviePy - Done.

File: datasets/train/Normal/CdtDTYW5YOs_000117_000127.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 82.23%
         Reading: 4.44%
        Studying: 2.49%
      Decorating: 2.04%
  Using computer: 2.04%



 67%|██████▋   | 281/422 [00:50<00:45,  3.07it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CeYIbLlRVVg_000357_000367.wav


 67%|██████▋   | 282/422 [00:50<00:46,  2.98it/s]

MoviePy - Done.

File: datasets/train/Normal/CeYIbLlRVVg_000357_000367.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 49.16%
  Using computer: 24.13%
No people is in the room: 7.01%
Unattended cooking: 4.40%
         Cooking: 4.34%



 67%|██████▋   | 282/422 [00:50<00:46,  2.98it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CfP4T-QKX4U_000332_000342.wav


 67%|██████▋   | 283/422 [00:50<00:45,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/CfP4T-QKX4U_000332_000342.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 37.83%
   Setting table: 20.01%
Playing board games: 13.87%
         Reading: 7.51%
    Sitting down: 5.56%



 67%|██████▋   | 283/422 [00:50<00:45,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cgB_HnWGY9Y_000001_000011.wav


 67%|██████▋   | 284/422 [00:51<00:44,  3.12it/s]

MoviePy - Done.

File: datasets/train/Normal/cgB_HnWGY9Y_000001_000011.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 59.34%
         Dancing: 11.79%
         Singing: 4.83%
           Party: 3.48%
         Talking: 2.91%



 67%|██████▋   | 284/422 [00:51<00:44,  3.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CGQfpj5Q2XA_000026_000036.wav


 68%|██████▊   | 285/422 [00:51<00:43,  3.16it/s]

MoviePy - Done.

File: datasets/train/Normal/CGQfpj5Q2XA_000026_000036.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 21.48%
   Setting table: 11.48%
  Using computer: 9.71%
      Using drug: 7.55%
      Decorating: 5.55%



 68%|██████▊   | 285/422 [00:51<00:43,  3.16it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cGuxUVfBZ20_000050_000060.wav


 68%|██████▊   | 286/422 [00:51<00:42,  3.20it/s]

MoviePy - Done.

File: datasets/train/Normal/cGuxUVfBZ20_000050_000060.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 41.82%
     Celebrating: 10.00%
         Talking: 7.73%
   Setting table: 7.24%
   Mopping floor: 6.55%



 68%|██████▊   | 286/422 [00:51<00:42,  3.20it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ChRYSf_9SoM_000035_000045.wav


 68%|██████▊   | 287/422 [00:52<00:42,  3.14it/s]

MoviePy - Done.

File: datasets/train/Normal/ChRYSf_9SoM_000035_000045.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 14.49%
      Decorating: 12.37%
     Celebrating: 7.14%
        Cleaning: 6.98%
   Setting table: 6.19%



 68%|██████▊   | 287/422 [00:52<00:42,  3.14it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CLGYN3I3nIY_000039_000049.wav


 68%|██████▊   | 288/422 [00:52<00:43,  3.09it/s]

MoviePy - Done.

File: datasets/train/Normal/CLGYN3I3nIY_000039_000049.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 63.84%
     Eating food: 17.57%
         Smoking: 6.38%
No people is in the room: 3.16%
      Using drug: 2.19%



 68%|██████▊   | 288/422 [00:52<00:43,  3.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CLj6957VkZg_000041_000051.wav


 68%|██████▊   | 289/422 [00:52<00:42,  3.15it/s]

MoviePy - Done.

File: datasets/train/Normal/CLj6957VkZg_000041_000051.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 51.94%
       Showering: 30.08%
   Doing laundry: 6.23%
   Mopping floor: 4.05%
        Drinking: 1.17%



 68%|██████▊   | 289/422 [00:52<00:42,  3.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CM3tD22RxB8_000004_000014.wav


 69%|██████▊   | 290/422 [00:53<00:41,  3.15it/s]

MoviePy - Done.

File: datasets/train/Normal/CM3tD22RxB8_000004_000014.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 26.03%
   Doing laundry: 22.30%
Playing with pets: 17.73%
   Mopping floor: 11.36%
    Sitting down: 4.08%



 69%|██████▊   | 290/422 [00:53<00:41,  3.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CqPXNz7YDYM_000007_000017.wav


 69%|██████▉   | 291/422 [00:53<00:41,  3.18it/s]

MoviePy - Done.

File: datasets/train/Normal/CqPXNz7YDYM_000007_000017.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 44.75%
         Dancing: 6.77%
     Eating food: 5.86%
        Drinking: 4.27%
         Talking: 4.05%



 69%|██████▉   | 291/422 [00:53<00:41,  3.18it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cRg6xZpmBrI_000404_000414.wav


 69%|██████▉   | 292/422 [00:53<00:40,  3.22it/s]

MoviePy - Done.

File: datasets/train/Normal/cRg6xZpmBrI_000404_000414.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 60.00%
         Talking: 9.06%
         Choking: 4.52%
        Chatting: 4.10%
        Speaking: 3.52%



 69%|██████▉   | 292/422 [00:53<00:40,  3.22it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/csQSjmHhsXA_000018_000028.wav


 69%|██████▉   | 293/422 [00:53<00:40,  3.18it/s]

MoviePy - Done.

File: datasets/train/Normal/csQSjmHhsXA_000018_000028.mp4
Ground Truth: Normal
Top predictions:
Unattended cooking: 55.24%
         Cooking: 24.23%
   Setting table: 13.57%
         Working: 1.44%
  Using computer: 1.25%



 69%|██████▉   | 293/422 [00:54<00:40,  3.18it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CtDdx-V3J7M_000074_000084.wav


 70%|██████▉   | 294/422 [00:54<00:39,  3.21it/s]

MoviePy - Done.

File: datasets/train/Normal/CtDdx-V3J7M_000074_000084.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 62.31%
        Knitting: 5.97%
Play with phone/tablet: 3.14%
Playing with pets: 3.13%
        Studying: 2.89%



 70%|██████▉   | 294/422 [00:54<00:39,  3.21it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cThgCWwzvU8_000015_000025.wav


 70%|██████▉   | 295/422 [00:54<00:40,  3.11it/s]

MoviePy - Done.

File: datasets/train/Normal/cThgCWwzvU8_000015_000025.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 18.02%
   Doing laundry: 10.34%
           Party: 10.00%
         Dancing: 8.68%
       Showering: 6.08%



 70%|██████▉   | 295/422 [00:54<00:40,  3.11it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CTZETRzQWOU_000031_000041.wav


 70%|███████   | 296/422 [00:54<00:39,  3.18it/s]

MoviePy - Done.

File: datasets/train/Normal/CTZETRzQWOU_000031_000041.mp4
Ground Truth: Normal
Top predictions:
         Reading: 19.52%
No people is in the room: 10.70%
  Using computer: 9.21%
         Working: 7.18%
   Setting table: 6.79%



 70%|███████   | 296/422 [00:55<00:39,  3.18it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CyqaeqsCKMU_000050_000060.wav


 70%|███████   | 297/422 [00:55<00:38,  3.21it/s]

MoviePy - Done.

File: datasets/train/Normal/CyqaeqsCKMU_000050_000060.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 23.40%
   Doing laundry: 12.11%
      Using drug: 7.26%
         Dancing: 6.43%
         Smoking: 5.52%



 70%|███████   | 297/422 [00:55<00:38,  3.21it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/D6M-1uemZTU_000152_000162.wav


 71%|███████   | 298/422 [00:55<00:40,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/D6M-1uemZTU_000152_000162.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 24.59%
      Exercising: 17.78%
No people is in the room: 17.56%
    Sitting down: 9.36%
     Celebrating: 6.35%



 71%|███████   | 298/422 [00:55<00:40,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/d7Ggedw-sOM_001169_001179.wav


 71%|███████   | 299/422 [00:55<00:40,  3.07it/s]

MoviePy - Done.

File: datasets/train/Normal/d7Ggedw-sOM_001169_001179.mp4
Ground Truth: Normal
Top predictions:
Playing board games: 17.63%
  Using computer: 16.43%
No people is in the room: 16.30%
      Exercising: 11.93%
         Reading: 6.87%



 71%|███████   | 299/422 [00:56<00:40,  3.07it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/D7lm7hLSM0I_000034_000044.wav


 71%|███████   | 300/422 [00:56<00:38,  3.13it/s]

MoviePy - Done.

File: datasets/train/Normal/D7lm7hLSM0I_000034_000044.mp4
Ground Truth: Normal
Top predictions:
         Reading: 76.52%
        Speaking: 7.59%
         Talking: 4.55%
No people is in the room: 3.20%
   Setting table: 1.19%



 71%|███████   | 300/422 [00:56<00:38,  3.13it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dfULz-lLaDc_000002_000012.wav


 71%|███████▏  | 301/422 [00:56<00:38,  3.17it/s]

MoviePy - Done.

File: datasets/train/Normal/dfULz-lLaDc_000002_000012.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 59.49%
    Sitting down: 10.93%
   Doing laundry: 7.78%
         Dancing: 4.95%
No people is in the room: 4.11%



 71%|███████▏  | 301/422 [00:56<00:38,  3.17it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/DFX28NEFY38_000157_000167.wav


 72%|███████▏  | 302/422 [00:56<00:38,  3.15it/s]

MoviePy - Done.

File: datasets/train/Normal/DFX28NEFY38_000157_000167.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 28.51%
No people is in the room: 23.54%
         Talking: 8.65%
        Drinking: 6.99%
        Speaking: 6.36%



 72%|███████▏  | 302/422 [00:56<00:38,  3.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/DkZml-dK0pU_000043_000053.wav


 72%|███████▏  | 303/422 [00:57<00:38,  3.11it/s]

MoviePy - Done.

File: datasets/train/Normal/DkZml-dK0pU_000043_000053.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 65.30%
       Showering: 7.53%
         Dancing: 6.96%
   Mopping floor: 6.77%
   Setting table: 2.47%



 72%|███████▏  | 303/422 [00:57<00:38,  3.11it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/DLhpyADKYKM_000184_000194.wav


 72%|███████▏  | 304/422 [00:57<00:37,  3.15it/s]

MoviePy - Done.

File: datasets/train/Normal/DLhpyADKYKM_000184_000194.mp4
Ground Truth: Normal
Top predictions:
      Decorating: 25.19%
        Studying: 23.53%
        Cleaning: 18.87%
        Painting: 8.22%
    Sitting down: 6.83%



 72%|███████▏  | 304/422 [00:57<00:37,  3.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dlK1QS-Aofs_000066_000076.wav


 72%|███████▏  | 305/422 [00:57<00:36,  3.19it/s]

MoviePy - Done.

File: datasets/train/Normal/dlK1QS-Aofs_000066_000076.mp4
Ground Truth: Normal
Top predictions:
        Cleaning: 8.48%
      Using drug: 7.85%
   Setting table: 7.05%
      Decorating: 6.58%
        Painting: 5.99%



 72%|███████▏  | 305/422 [00:57<00:36,  3.19it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dMBlXTR3qQU_000187_000197.wav


 73%|███████▎  | 306/422 [00:58<00:36,  3.15it/s]

MoviePy - Done.

File: datasets/train/Normal/dMBlXTR3qQU_000187_000197.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 77.08%
   Doing laundry: 11.35%
   Mopping floor: 2.40%
        Sleeping: 1.36%
Physical altercations: 1.19%



 73%|███████▎  | 306/422 [00:58<00:36,  3.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dndkqyk45Ks_000015_000025.wav


 73%|███████▎  | 307/422 [00:58<00:36,  3.12it/s]

MoviePy - Done.

File: datasets/train/Normal/dndkqyk45Ks_000015_000025.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 58.23%
Playing with pets: 17.61%
   Setting table: 12.92%
   Doing laundry: 4.24%
   Mopping floor: 2.70%



 73%|███████▎  | 307/422 [00:58<00:36,  3.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/DNWCjFUE75s_000289_000299.wav


 73%|███████▎  | 308/422 [00:58<00:37,  3.00it/s]

MoviePy - Done.

File: datasets/train/Normal/DNWCjFUE75s_000289_000299.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 15.02%
    Sitting down: 11.23%
No people is in the room: 10.92%
   Setting table: 9.14%
  Using computer: 7.47%



 73%|███████▎  | 308/422 [00:58<00:37,  3.00it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dsnkk1tytpE_000066_000076.wav


 73%|███████▎  | 309/422 [00:59<00:36,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/dsnkk1tytpE_000066_000076.mp4
Ground Truth: Normal
Top predictions:
Playing board games: 41.96%
Playing with pets: 14.56%
   Setting table: 12.22%
          Dining: 8.18%
No people is in the room: 4.77%



 73%|███████▎  | 309/422 [00:59<00:36,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dxBH1r8To3Y_000010_000020.wav


 73%|███████▎  | 310/422 [00:59<00:36,  3.08it/s]

MoviePy - Done.

File: datasets/train/Normal/dxBH1r8To3Y_000010_000020.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 84.53%
Playing with pets: 4.86%
        Drinking: 4.57%
No people is in the room: 4.33%
    Sitting down: 0.39%



 73%|███████▎  | 310/422 [00:59<00:36,  3.08it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dXOxzXI-psA_000017_000027.wav


 74%|███████▎  | 311/422 [00:59<00:35,  3.14it/s]

MoviePy - Done.

File: datasets/train/Normal/dXOxzXI-psA_000017_000027.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 59.92%
  Using computer: 11.06%
Play with phone/tablet: 4.83%
         Talking: 4.82%
Making phone calls: 3.85%



 74%|███████▎  | 311/422 [00:59<00:35,  3.14it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/e-xCuXyO1w0_000122_000132.wav


 74%|███████▍  | 312/422 [01:00<00:35,  3.13it/s]

MoviePy - Done.

File: datasets/train/Normal/e-xCuXyO1w0_000122_000132.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 73.83%
   Doing laundry: 15.96%
   Mopping floor: 2.37%
         Working: 1.89%
No people is in the room: 1.35%



 74%|███████▍  | 312/422 [01:00<00:35,  3.13it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/E4sSE4n7AIY_000617_000627.wav


 74%|███████▍  | 313/422 [01:00<00:34,  3.13it/s]

MoviePy - Done.

File: datasets/train/Normal/E4sSE4n7AIY_000617_000627.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 36.09%
        Painting: 23.81%
Play with phone/tablet: 12.11%
        Studying: 6.20%
         Working: 2.72%



 74%|███████▍  | 313/422 [01:00<00:34,  3.13it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/e6knPRich_k_000053_000063.wav


 74%|███████▍  | 314/422 [01:00<00:35,  3.04it/s]

MoviePy - Done.

File: datasets/train/Normal/e6knPRich_k_000053_000063.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 52.78%
Playing with pets: 8.99%
         Talking: 6.38%
          Dining: 6.35%
No people is in the room: 6.20%



 74%|███████▍  | 314/422 [01:00<00:35,  3.04it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/e7MT8cHYxls_000016_000026.wav


 75%|███████▍  | 315/422 [01:01<00:36,  2.95it/s]

MoviePy - Done.

File: datasets/train/Normal/e7MT8cHYxls_000016_000026.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 93.35%
   Doing laundry: 1.29%
         Talking: 1.05%
Playing with pets: 0.62%
         Reading: 0.52%



 75%|███████▍  | 315/422 [01:01<00:36,  2.95it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/eDegjR0YmXE_000102_000112.wav


 75%|███████▍  | 316/422 [01:01<00:36,  2.94it/s]

MoviePy - Done.

File: datasets/train/Normal/eDegjR0YmXE_000102_000112.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 38.70%
        Studying: 16.65%
No people is in the room: 13.59%
Organizing space: 12.80%
         Working: 4.38%



 75%|███████▍  | 316/422 [01:01<00:36,  2.94it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/eJfnOEeJpM0_000044_000054.wav


 75%|███████▌  | 317/422 [01:01<00:34,  3.00it/s]

MoviePy - Done.

File: datasets/train/Normal/eJfnOEeJpM0_000044_000054.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 36.45%
   Setting table: 20.62%
   Doing laundry: 4.49%
      Decorating: 4.26%
     Eating food: 3.17%



 75%|███████▌  | 317/422 [01:01<00:34,  3.00it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ENKtS5yCT2o_000053_000063.wav


 75%|███████▌  | 318/422 [01:02<00:33,  3.09it/s]

MoviePy - Done.

File: datasets/train/Normal/ENKtS5yCT2o_000053_000063.mp4
Ground Truth: Normal
Top predictions:
Organizing space: 28.70%
No people is in the room: 13.50%
          Dining: 12.17%
  Using computer: 6.07%
      Using drug: 4.22%



 75%|███████▌  | 318/422 [01:02<00:33,  3.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/EQViTe9m1Hw_000112_000122.wav


 76%|███████▌  | 319/422 [01:02<00:33,  3.08it/s]

MoviePy - Done.

File: datasets/train/Normal/EQViTe9m1Hw_000112_000122.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 50.75%
        Studying: 32.67%
        Painting: 5.39%
      Decorating: 3.34%
         Working: 2.65%



 76%|███████▌  | 319/422 [01:02<00:33,  3.08it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ew7MSTTFMeg_000272_000282.wav


 76%|███████▌  | 320/422 [01:02<00:32,  3.14it/s]

MoviePy - Done.

File: datasets/train/Normal/ew7MSTTFMeg_000272_000282.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 26.29%
         Reading: 22.64%
Playing with pets: 13.36%
  Using computer: 5.42%
         Writing: 4.25%



 76%|███████▌  | 320/422 [01:02<00:32,  3.14it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/F1KA2nf-C9M_000133_000143.wav


 76%|███████▌  | 321/422 [01:02<00:31,  3.17it/s]

MoviePy - Done.

File: datasets/train/Normal/F1KA2nf-C9M_000133_000143.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 28.67%
  Using computer: 18.95%
         Reading: 9.54%
      Decorating: 6.34%
        Studying: 4.30%



 76%|███████▌  | 321/422 [01:03<00:31,  3.17it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/f3DMBH9EJNI_000007_000017.wav


 76%|███████▋  | 322/422 [01:03<00:32,  3.09it/s]

MoviePy - Done.

File: datasets/train/Normal/f3DMBH9EJNI_000007_000017.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 30.15%
    Sitting down: 23.64%
        Laughing: 10.97%
         Dancing: 8.37%
      Exercising: 3.07%



 76%|███████▋  | 322/422 [01:03<00:32,  3.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/F621wrqXp34_000036_000046.wav


 77%|███████▋  | 323/422 [01:03<00:32,  3.03it/s]

MoviePy - Done.

File: datasets/train/Normal/F621wrqXp34_000036_000046.mp4
Ground Truth: Normal
Top predictions:
         Reading: 26.31%
  Using computer: 14.48%
        Studying: 11.75%
No people is in the room: 11.47%
Organizing space: 6.83%



 77%|███████▋  | 323/422 [01:03<00:32,  3.03it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/FJ8M6LK6evw_000022_000032.wav


 77%|███████▋  | 324/422 [01:03<00:33,  2.97it/s]

MoviePy - Done.

File: datasets/train/Normal/FJ8M6LK6evw_000022_000032.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 17.99%
        Chatting: 12.34%
     Celebrating: 9.25%
      Using drug: 8.77%
Organizing space: 8.03%



 77%|███████▋  | 324/422 [01:04<00:33,  2.97it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/FJljds1AA2o_000016_000026.wav


 77%|███████▋  | 325/422 [01:04<00:32,  2.99it/s]

MoviePy - Done.

File: datasets/train/Normal/FJljds1AA2o_000016_000026.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 36.42%
         Talking: 22.68%
         Reading: 13.29%
     Eating food: 9.95%
        Speaking: 6.48%



 77%|███████▋  | 325/422 [01:04<00:32,  2.99it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/gem1VWkzae8_000875_000885.wav


 77%|███████▋  | 326/422 [01:04<00:32,  2.99it/s]

MoviePy - Done.

File: datasets/train/Normal/gem1VWkzae8_000875_000885.mp4
Ground Truth: Normal
Top predictions:
        Studying: 48.79%
Organizing space: 13.13%
  Using computer: 12.73%
   Setting table: 8.12%
         Working: 3.56%



 77%|███████▋  | 326/422 [01:04<00:32,  2.99it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Gf5htdt_Djg_000107_000117.wav


 77%|███████▋  | 327/422 [01:04<00:31,  3.00it/s]

MoviePy - Done.

File: datasets/train/Normal/Gf5htdt_Djg_000107_000117.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 42.90%
Playing board games: 33.80%
   Setting table: 6.86%
     Eating food: 4.77%
          Dining: 3.14%



 77%|███████▋  | 327/422 [01:05<00:31,  3.00it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/gmE90XbBgOk_000000_000010.wav


 78%|███████▊  | 328/422 [01:05<00:30,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/gmE90XbBgOk_000000_000010.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 27.25%
     Eating food: 18.59%
         Smoking: 16.54%
        Drinking: 12.78%
No people is in the room: 4.37%



 78%|███████▊  | 328/422 [01:05<00:30,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/GnXROFauJVY_000026_000036.wav


 78%|███████▊  | 329/422 [01:05<00:30,  3.07it/s]

MoviePy - Done.

File: datasets/train/Normal/GnXROFauJVY_000026_000036.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 77.08%
No people is in the room: 4.03%
           Party: 3.92%
          Dining: 2.64%
         Potluck: 2.09%



 78%|███████▊  | 329/422 [01:05<00:30,  3.07it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/H89ENzVtnlg_000095_000105.wav


 78%|███████▊  | 330/422 [01:05<00:31,  2.95it/s]

MoviePy - Done.

File: datasets/train/Normal/H89ENzVtnlg_000095_000105.mp4
Ground Truth: Normal
Top predictions:
       Showering: 43.07%
   Mopping floor: 10.41%
   Doing laundry: 8.55%
         Cooking: 8.23%
No people is in the room: 6.63%



 78%|███████▊  | 330/422 [01:06<00:31,  2.95it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/HJqSjhNOMjY_000143_000153.wav


 78%|███████▊  | 331/422 [01:06<00:31,  2.92it/s]

MoviePy - Done.

File: datasets/train/Normal/HJqSjhNOMjY_000143_000153.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 27.34%
     Celebrating: 21.32%
No people is in the room: 18.65%
         Talking: 4.22%
        Speaking: 3.54%



 78%|███████▊  | 331/422 [01:06<00:31,  2.92it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/hM8CfzDDyIg_000158_000168.wav


 79%|███████▊  | 332/422 [01:06<00:30,  2.94it/s]

MoviePy - Done.

File: datasets/train/Normal/hM8CfzDDyIg_000158_000168.mp4
Ground Truth: Normal
Top predictions:
       Showering: 34.44%
Unattended cooking: 31.79%
         Cooking: 19.49%
        Cleaning: 3.67%
   Doing laundry: 3.57%



 79%|███████▊  | 332/422 [01:06<00:30,  2.94it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/hQ4OpC2RuLY_000120_000130.wav


 79%|███████▉  | 333/422 [01:06<00:29,  3.01it/s]

MoviePy - Done.

File: datasets/train/Normal/hQ4OpC2RuLY_000120_000130.mp4
Ground Truth: Normal
Top predictions:
              TV: 31.33%
  Using computer: 11.56%
      Using drug: 10.52%
     Celebrating: 6.09%
No people is in the room: 5.41%



 79%|███████▉  | 333/422 [01:07<00:29,  3.01it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/hxyttQsXwOc_000079_000089.wav


 79%|███████▉  | 334/422 [01:07<00:28,  3.04it/s]

MoviePy - Done.

File: datasets/train/Normal/hxyttQsXwOc_000079_000089.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 43.28%
         Talking: 18.85%
No people is in the room: 6.30%
        Chatting: 4.00%
        Speaking: 3.96%



 79%|███████▉  | 334/422 [01:07<00:28,  3.04it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/i70NodYlh7E_000336_000346.wav


 79%|███████▉  | 335/422 [01:07<00:29,  2.98it/s]

MoviePy - Done.

File: datasets/train/Normal/i70NodYlh7E_000336_000346.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 26.36%
          Dining: 23.94%
   Setting table: 11.23%
        Drinking: 6.83%
  Using computer: 6.45%



 79%|███████▉  | 335/422 [01:07<00:29,  2.98it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/IGHpI5LRzuk_000054_000064.wav


 80%|███████▉  | 336/422 [01:07<00:28,  3.04it/s]

MoviePy - Done.

File: datasets/train/Normal/IGHpI5LRzuk_000054_000064.mp4
Ground Truth: Normal
Top predictions:
  Brushing teeth: 97.65%
        Cleaning: 1.37%
       Showering: 0.44%
      Using drug: 0.11%
        Painting: 0.10%



 80%|███████▉  | 336/422 [01:08<00:28,  3.04it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/IKG6W6LmSlw_000000_000010.wav


 80%|███████▉  | 337/422 [01:08<00:27,  3.09it/s]

MoviePy - Done.

File: datasets/train/Normal/IKG6W6LmSlw_000000_000010.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 31.27%
     Eating food: 9.44%
No people is in the room: 8.04%
  Brushing teeth: 7.97%
         Writing: 6.61%



 80%|███████▉  | 337/422 [01:08<00:27,  3.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/JpgvBLEoGPY_000008_000018.wav


 80%|████████  | 338/422 [01:08<00:27,  3.05it/s]

MoviePy - Done.

File: datasets/train/Normal/JpgvBLEoGPY_000008_000018.mp4
Ground Truth: Normal
Top predictions:
         Potluck: 43.18%
         Singing: 10.67%
     Celebrating: 9.64%
      Using drug: 7.20%
      Meditating: 5.16%



 80%|████████  | 338/422 [01:08<00:27,  3.05it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Jvx3wT69CVI_000460_000470.wav


 80%|████████  | 339/422 [01:08<00:26,  3.08it/s]

MoviePy - Done.

File: datasets/train/Normal/Jvx3wT69CVI_000460_000470.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 57.45%
   Setting table: 25.04%
   Doing laundry: 4.39%
        Drinking: 1.83%
         Working: 1.54%



 80%|████████  | 339/422 [01:09<00:26,  3.08it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/k088TLn7oIU_000090_000100.wav


 81%|████████  | 340/422 [01:09<00:26,  3.10it/s]

MoviePy - Done.

File: datasets/train/Normal/k088TLn7oIU_000090_000100.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 46.39%
No people is in the room: 6.88%
         Working: 6.35%
      Decorating: 5.61%
        Cleaning: 3.68%



 81%|████████  | 340/422 [01:09<00:26,  3.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/KCk7JLKqICc_000068_000078.wav


 81%|████████  | 341/422 [01:09<00:27,  2.98it/s]

MoviePy - Done.

File: datasets/train/Normal/KCk7JLKqICc_000068_000078.mp4
Ground Truth: Normal
Top predictions:
        Studying: 30.53%
         Working: 15.89%
      Exercising: 12.74%
   Doing laundry: 8.48%
No people is in the room: 7.48%



 81%|████████  | 341/422 [01:09<00:27,  2.98it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/kCyv6z2yar4_000002_000012.wav


 81%|████████  | 342/422 [01:09<00:26,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/kCyv6z2yar4_000002_000012.mp4
Ground Truth: Normal
Top predictions:
Organizing space: 23.04%
         Dancing: 13.04%
   Playing music: 11.88%
Playing with pets: 8.56%
         Working: 7.07%



 81%|████████  | 342/422 [01:10<00:26,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/kW5BmCKH248_000447_000457.wav


 81%|████████▏ | 343/422 [01:10<00:26,  3.02it/s]

MoviePy - Done.

File: datasets/train/Normal/kW5BmCKH248_000447_000457.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 32.88%
  Using computer: 26.45%
      Using drug: 9.10%
     Celebrating: 5.77%
   Setting table: 3.57%



 81%|████████▏ | 343/422 [01:10<00:26,  3.02it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/KY3dD8VcGnM_000022_000032.wav


 82%|████████▏ | 344/422 [01:10<00:25,  3.02it/s]

MoviePy - Done.

File: datasets/train/Normal/KY3dD8VcGnM_000022_000032.mp4
Ground Truth: Normal
Top predictions:
          Dining: 18.80%
      Decorating: 14.41%
  Using computer: 10.77%
   Setting table: 9.01%
         Potluck: 8.96%



 82%|████████▏ | 344/422 [01:10<00:25,  3.02it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/L0gZURkoVtM_000056_000066.wav


 82%|████████▏ | 345/422 [01:10<00:25,  2.99it/s]

MoviePy - Done.

File: datasets/train/Normal/L0gZURkoVtM_000056_000066.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 24.99%
        Drinking: 16.00%
     Eating food: 12.52%
         Smoking: 9.84%
      Exercising: 4.44%



 82%|████████▏ | 345/422 [01:11<00:25,  2.99it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/lCZLHNyUiMo_000003_000013.wav


 82%|████████▏ | 346/422 [01:11<00:25,  2.98it/s]

MoviePy - Done.

File: datasets/train/Normal/lCZLHNyUiMo_000003_000013.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 45.95%
     Suffocating: 6.78%
Furniture Collapse: 5.87%
    Falling down: 4.99%
Domestic violence: 4.78%



 82%|████████▏ | 346/422 [01:11<00:25,  2.98it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Ll6b1B9YDgM_000032_000042.wav


 82%|████████▏ | 347/422 [01:11<00:25,  2.96it/s]

MoviePy - Done.

File: datasets/train/Normal/Ll6b1B9YDgM_000032_000042.mp4
Ground Truth: Normal
Top predictions:
          Dining: 55.69%
        Drinking: 11.06%
     Eating food: 9.87%
        Studying: 7.13%
   Setting table: 5.53%



 82%|████████▏ | 347/422 [01:11<00:25,  2.96it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/N5n6j6vBGu0_000008_000018.wav


 82%|████████▏ | 348/422 [01:11<00:25,  2.91it/s]

MoviePy - Done.

File: datasets/train/Normal/N5n6j6vBGu0_000008_000018.mp4
Ground Truth: Normal
Top predictions:
         Reading: 28.27%
No people is in the room: 19.71%
        Speaking: 9.23%
   Doing laundry: 8.07%
         Talking: 6.53%



 82%|████████▏ | 348/422 [01:12<00:25,  2.91it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/oeguonb5OA0_000004_000014.wav


 83%|████████▎ | 349/422 [01:12<00:24,  3.03it/s]

MoviePy - Done.

File: datasets/train/Normal/oeguonb5OA0_000004_000014.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 85.10%
Playing with pets: 3.21%
         Talking: 2.62%
   Mopping floor: 1.69%
Playing board games: 1.60%



 83%|████████▎ | 349/422 [01:12<00:24,  3.03it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/oLPzKb6j80U_000009_000019.wav


 83%|████████▎ | 350/422 [01:12<00:23,  3.08it/s]

MoviePy - Done.

File: datasets/train/Normal/oLPzKb6j80U_000009_000019.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 17.57%
      Meditating: 16.48%
     Celebrating: 12.69%
  Using computer: 7.32%
      Decorating: 6.35%



 83%|████████▎ | 350/422 [01:12<00:23,  3.08it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/OmBlRJEoY9s_000088_000098.wav


 83%|████████▎ | 351/422 [01:12<00:23,  3.03it/s]

MoviePy - Done.

File: datasets/train/Normal/OmBlRJEoY9s_000088_000098.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 52.01%
     Eating food: 14.83%
          Dining: 7.48%
No people is in the room: 4.87%
Playing with pets: 2.83%



 83%|████████▎ | 351/422 [01:13<00:23,  3.03it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/oUc6ZrEBL80_000519_000529.wav


 83%|████████▎ | 352/422 [01:13<00:23,  3.03it/s]

MoviePy - Done.

File: datasets/train/Normal/oUc6ZrEBL80_000519_000529.mp4
Ground Truth: Normal
Top predictions:
        Cleaning: 31.02%
      Meditating: 14.60%
       Vacumning: 10.83%
No people is in the room: 5.07%
         Working: 4.61%



 83%|████████▎ | 352/422 [01:13<00:23,  3.03it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/P6AChrs1Txo_000187_000197.wav


 84%|████████▎ | 353/422 [01:13<00:22,  3.01it/s]

MoviePy - Done.

File: datasets/train/Normal/P6AChrs1Txo_000187_000197.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 52.70%
Playing board games: 11.60%
Playing with pets: 8.41%
         Reading: 4.98%
         Talking: 3.84%



 84%|████████▎ | 353/422 [01:13<00:22,  3.01it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/P8amgAResD4_000279_000289.wav


 84%|████████▎ | 353/422 [01:13<00:22,  3.01it/s]

MoviePy - Done.


 84%|████████▍ | 354/422 [01:13<00:23,  2.87it/s]


File: datasets/train/Normal/P8amgAResD4_000279_000289.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 28.74%
    Sitting down: 16.68%
         Talking: 14.18%
      Meditating: 5.76%
         Dancing: 4.83%



 84%|████████▍ | 354/422 [01:14<00:23,  2.87it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/pz28ycbJiQc_000002_000012.wav


 84%|████████▍ | 355/422 [01:14<00:22,  2.94it/s]

MoviePy - Done.

File: datasets/train/Normal/pz28ycbJiQc_000002_000012.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 61.89%
         Reading: 8.32%
      Meditating: 6.58%
  Using computer: 3.25%
     Celebrating: 3.09%



 84%|████████▍ | 355/422 [01:14<00:22,  2.94it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/PzhivDBg9F0_000016_000026.wav


 84%|████████▍ | 356/422 [01:14<00:21,  3.01it/s]

MoviePy - Done.

File: datasets/train/Normal/PzhivDBg9F0_000016_000026.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 19.70%
         Reading: 11.52%
Organizing space: 10.59%
      Using drug: 8.96%
        Studying: 8.68%



 84%|████████▍ | 356/422 [01:14<00:21,  3.01it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/QAKwGG98wvc_000020_000030.wav


 85%|████████▍ | 357/422 [01:14<00:21,  3.08it/s]

MoviePy - Done.

File: datasets/train/Normal/QAKwGG98wvc_000020_000030.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 21.31%
  Using computer: 16.95%
      Using drug: 12.66%
        Chatting: 6.46%
        Speaking: 6.17%



 85%|████████▍ | 357/422 [01:15<00:21,  3.08it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/rLQ5fmVSL94_000026_000036.wav


 85%|████████▍ | 358/422 [01:15<00:20,  3.05it/s]

MoviePy - Done.

File: datasets/train/Normal/rLQ5fmVSL94_000026_000036.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 15.45%
      Decorating: 11.13%
    Sitting down: 10.84%
      Using drug: 8.22%
  Using computer: 5.41%



 85%|████████▍ | 358/422 [01:15<00:20,  3.05it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/X46EhfXTqxg_000727_000737.wav


 85%|████████▌ | 359/422 [01:15<00:20,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/X46EhfXTqxg_000727_000737.mp4
Ground Truth: Normal
Top predictions:
        Studying: 15.13%
No people is in the room: 12.92%
  Using computer: 11.42%
         Working: 11.11%
      Decorating: 11.02%



 85%|████████▌ | 359/422 [01:15<00:20,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_0xvqLRQvOA_000074_000084.wav


 85%|████████▌ | 360/422 [01:15<00:20,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/_0xvqLRQvOA_000074_000084.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 58.45%
No people is in the room: 19.40%
   Doing laundry: 6.58%
   Mopping floor: 4.34%
   Setting table: 1.74%



 85%|████████▌ | 360/422 [01:16<00:20,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_3vZKUxeQtg_000298_000308.wav


 86%|████████▌ | 361/422 [01:16<00:19,  3.05it/s]

MoviePy - Done.

File: datasets/train/Normal/_3vZKUxeQtg_000298_000308.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 91.27%
        Drinking: 8.01%
Playing with pets: 0.37%
         Choking: 0.11%
         Smoking: 0.06%



 86%|████████▌ | 361/422 [01:16<00:19,  3.05it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_4EOMgSAs2o_000094_000104.wav


 86%|████████▌ | 362/422 [01:16<00:19,  3.02it/s]

MoviePy - Done.

File: datasets/train/Normal/_4EOMgSAs2o_000094_000104.mp4
Ground Truth: Normal
Top predictions:
         Reading: 27.93%
         Talking: 20.83%
        Speaking: 12.98%
No people is in the room: 11.67%
      Meditating: 5.48%



 86%|████████▌ | 362/422 [01:16<00:19,  3.02it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_5ItAUGPgqI_000742_000752.wav


 86%|████████▌ | 363/422 [01:16<00:19,  3.04it/s]

MoviePy - Done.

File: datasets/train/Normal/_5ItAUGPgqI_000742_000752.mp4
Ground Truth: Normal
Top predictions:
         Cooking: 51.64%
Unattended cooking: 45.29%
   Setting table: 1.36%
No people is in the room: 0.70%
   Doing laundry: 0.43%



 86%|████████▌ | 363/422 [01:17<00:19,  3.04it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_82X-ic2kME_000001_000011.wav


 86%|████████▋ | 364/422 [01:17<00:18,  3.15it/s]

MoviePy - Done.

File: datasets/train/Normal/_82X-ic2kME_000001_000011.mp4
Ground Truth: Normal
Top predictions:
  Brushing teeth: 43.18%
No people is in the room: 16.04%
         Singing: 3.82%
        Speaking: 3.57%
         Talking: 3.26%



 86%|████████▋ | 364/422 [01:17<00:18,  3.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_8w7SkEkehQ_000037_000047.wav


 86%|████████▋ | 365/422 [01:17<00:18,  3.12it/s]

MoviePy - Done.

File: datasets/train/Normal/_8w7SkEkehQ_000037_000047.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 34.47%
       Showering: 16.99%
         Cooking: 16.66%
Unattended cooking: 11.72%
No people is in the room: 8.76%



 86%|████████▋ | 365/422 [01:17<00:18,  3.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_99gyAUUgy4_000051_000061.wav


 87%|████████▋ | 366/422 [01:17<00:18,  3.10it/s]

MoviePy - Done.

File: datasets/train/Normal/_99gyAUUgy4_000051_000061.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 32.47%
  Using computer: 18.92%
      Using drug: 8.62%
    Sitting down: 5.94%
   Setting table: 3.66%



 87%|████████▋ | 366/422 [01:18<00:18,  3.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_A0_T6M6ULQ_000004_000014.wav


 87%|████████▋ | 367/422 [01:18<00:18,  3.01it/s]

MoviePy - Done.

File: datasets/train/Normal/_A0_T6M6ULQ_000004_000014.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 34.66%
   Doing laundry: 19.98%
         Reading: 10.78%
         Working: 10.00%
         Talking: 5.05%



 87%|████████▋ | 367/422 [01:18<00:18,  3.01it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_cMswRMB3WU_000124_000134.wav


 87%|████████▋ | 368/422 [01:18<00:17,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/_cMswRMB3WU_000124_000134.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 28.31%
     Celebrating: 15.51%
      Decorating: 7.29%
Playing with pets: 6.60%
  Using computer: 4.88%



 87%|████████▋ | 368/422 [01:18<00:17,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_cOAB4Neu5Q_000237_000247.wav


 87%|████████▋ | 369/422 [01:18<00:17,  3.10it/s]

MoviePy - Done.

File: datasets/train/Normal/_cOAB4Neu5Q_000237_000247.mp4
Ground Truth: Normal
Top predictions:
         Reading: 28.23%
No people is in the room: 22.07%
Playing board games: 15.58%
         Talking: 6.97%
        Studying: 6.18%



 87%|████████▋ | 369/422 [01:19<00:17,  3.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_fDxRDV_D28_000077_000087.wav


 88%|████████▊ | 370/422 [01:19<00:17,  3.01it/s]

MoviePy - Done.

File: datasets/train/Normal/_fDxRDV_D28_000077_000087.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 71.73%
No people is in the room: 9.34%
         Dancing: 3.92%
          Dining: 3.14%
    Sitting down: 2.74%



 88%|████████▊ | 370/422 [01:19<00:17,  3.01it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_hnn0Q5Ee0g_000037_000047.wav


 88%|████████▊ | 371/422 [01:19<00:16,  3.04it/s]

MoviePy - Done.

File: datasets/train/Normal/_hnn0Q5Ee0g_000037_000047.mp4
Ground Truth: Normal
Top predictions:
        Knitting: 18.96%
  Brushing teeth: 14.39%
        Cleaning: 14.20%
        Painting: 13.39%
Playing with pets: 9.70%



 88%|████████▊ | 371/422 [01:19<00:16,  3.04it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_jSACF07scs_000041_000051.wav


 88%|████████▊ | 372/422 [01:19<00:16,  3.12it/s]

MoviePy - Done.

File: datasets/train/Normal/_jSACF07scs_000041_000051.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 73.47%
    Sitting down: 10.42%
   Mopping floor: 5.59%
   Setting table: 3.15%
         Talking: 2.16%



 88%|████████▊ | 372/422 [01:19<00:16,  3.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_JuSKnGWkp0_000230_000240.wav


 88%|████████▊ | 373/422 [01:20<00:16,  3.03it/s]

MoviePy - Done.

File: datasets/train/Normal/_JuSKnGWkp0_000230_000240.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 35.40%
   Setting table: 23.85%
No people is in the room: 10.22%
     Celebrating: 8.62%
         Reading: 6.63%



 88%|████████▊ | 373/422 [01:20<00:16,  3.03it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_Kcgee5kKS0_000057_000067.wav


 89%|████████▊ | 374/422 [01:20<00:15,  3.03it/s]

MoviePy - Done.

File: datasets/train/Normal/_Kcgee5kKS0_000057_000067.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 40.01%
   Doing laundry: 28.92%
   Setting table: 12.26%
   Mopping floor: 4.77%
    Sitting down: 3.38%



 89%|████████▊ | 374/422 [01:20<00:15,  3.03it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_L2i7A8dMTg_000020_000030.wav


 89%|████████▉ | 375/422 [01:20<00:15,  3.03it/s]

MoviePy - Done.

File: datasets/train/Normal/_L2i7A8dMTg_000020_000030.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 82.46%
Playing with pets: 10.95%
   Setting table: 1.72%
   Mopping floor: 1.50%
Playing board games: 0.74%



 89%|████████▉ | 375/422 [01:20<00:15,  3.03it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_ONRAEcQsqw_000101_000111.wav


 89%|████████▉ | 376/422 [01:21<00:14,  3.09it/s]

MoviePy - Done.

File: datasets/train/Normal/_ONRAEcQsqw_000101_000111.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 50.42%
     Celebrating: 6.08%
      Exercising: 5.64%
  Using computer: 3.57%
         Working: 3.41%



 89%|████████▉ | 376/422 [01:21<00:14,  3.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_P7OzblP4KU_000002_000012.wav


 89%|████████▉ | 377/422 [01:21<00:14,  3.14it/s]

MoviePy - Done.

File: datasets/train/Normal/_P7OzblP4KU_000002_000012.mp4
Ground Truth: Normal
Top predictions:
       Showering: 80.04%
No people is in the room: 9.44%
   Doing laundry: 4.84%
Playing with pets: 2.45%
   Mopping floor: 1.21%



 89%|████████▉ | 377/422 [01:21<00:14,  3.14it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_Q5f75CvNfw_000103_000113.wav


 90%|████████▉ | 378/422 [01:21<00:14,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/_Q5f75CvNfw_000103_000113.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 16.74%
        Painting: 12.56%
      Decorating: 11.55%
      Using drug: 8.95%
        Studying: 4.81%



 90%|████████▉ | 378/422 [01:21<00:14,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_SlPWSA-1bU_000144_000154.wav


 90%|████████▉ | 379/422 [01:22<00:14,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/_SlPWSA-1bU_000144_000154.mp4
Ground Truth: Normal
Top predictions:
         Potluck: 38.45%
     Celebrating: 26.33%
No people is in the room: 14.08%
      Meditating: 4.27%
Playing board games: 3.10%



 90%|████████▉ | 379/422 [01:22<00:14,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_SsAvyBagZw_000011_000021.wav


 90%|█████████ | 380/422 [01:22<00:13,  3.11it/s]

MoviePy - Done.

File: datasets/train/Normal/_SsAvyBagZw_000011_000021.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 37.16%
              TV: 25.45%
         Working: 24.31%
  Using computer: 3.57%
     Laying down: 1.62%



 90%|█████████ | 380/422 [01:22<00:13,  3.11it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_TqghEKOFAQ_000045_000055.wav


 90%|█████████ | 381/422 [01:22<00:12,  3.16it/s]

MoviePy - Done.

File: datasets/train/Normal/_TqghEKOFAQ_000045_000055.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 62.65%
      Using drug: 16.94%
   Setting table: 3.30%
No people is in the room: 2.57%
         Reading: 1.96%



 90%|█████████ | 381/422 [01:22<00:12,  3.16it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_WijOegXa_Q_000391_000401.wav


 91%|█████████ | 382/422 [01:23<00:12,  3.14it/s]

MoviePy - Done.

File: datasets/train/Normal/_WijOegXa_Q_000391_000401.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 79.32%
     Eating food: 9.75%
          Dining: 3.05%
No people is in the room: 2.19%
   Setting table: 1.43%

This video datasets/train/Smoke or Fire or Open flame/posVideo1.868.mp4 has no audio!


 91%|█████████ | 384/422 [01:23<00:07,  4.88it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo10.869.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo11.870.mp4 has no audio!


 91%|█████████▏| 386/422 [01:23<00:05,  6.51it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo2.871.mp4 has no audio!


 92%|█████████▏| 388/422 [01:23<00:04,  8.01it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo3.872.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo4.873.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo5.874.mp4 has no audio!


 92%|█████████▏| 390/422 [01:23<00:03,  9.33it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo6.875.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo7.876.mp4 has no audio!


 93%|█████████▎| 392/422 [01:23<00:02, 10.42it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo8.877.mp4 has no audio!


 93%|█████████▎| 394/422 [01:23<00:02, 11.11it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo9.878.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video102.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video109.mp4 has no audio!


 94%|█████████▍| 396/422 [01:24<00:02, 11.61it/s]

This video datasets/train/Smoke or Fire or Open flame/Video11.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video115.mp4 has no audio!


 94%|█████████▍| 398/422 [01:24<00:02, 11.98it/s]

This video datasets/train/Smoke or Fire or Open flame/Video134_1.mp4 has no audio!


 95%|█████████▍| 400/422 [01:24<00:01, 11.84it/s]

This video datasets/train/Smoke or Fire or Open flame/Video136.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video14.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video149.mp4 has no audio!


 95%|█████████▌| 402/422 [01:24<00:01, 12.15it/s]

This video datasets/train/Smoke or Fire or Open flame/Video150.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video157.mp4 has no audio!


 96%|█████████▌| 404/422 [01:24<00:01, 12.35it/s]

This video datasets/train/Smoke or Fire or Open flame/Video170.mp4 has no audio!


 96%|█████████▌| 406/422 [01:24<00:01, 12.48it/s]

This video datasets/train/Smoke or Fire or Open flame/Video202.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video209.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video21.mp4 has no audio!


 97%|█████████▋| 408/422 [01:25<00:01, 12.74it/s]

This video datasets/train/Smoke or Fire or Open flame/Video234.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video239.mp4 has no audio!


 97%|█████████▋| 410/422 [01:25<00:00, 12.80it/s]

This video datasets/train/Smoke or Fire or Open flame/Video257.mp4 has no audio!


 98%|█████████▊| 412/422 [01:25<00:00, 12.65it/s]

This video datasets/train/Smoke or Fire or Open flame/Video37.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video51.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video67.mp4 has no audio!


 98%|█████████▊| 414/422 [01:25<00:00, 11.21it/s]

This video datasets/train/Smoke or Fire or Open flame/Video73.mp4 has no audio!


 99%|█████████▊| 416/422 [01:25<00:00, 11.53it/s]

This video datasets/train/Smoke or Fire or Open flame/Video74.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video75_1.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video75_2.mp4 has no audio!


 99%|█████████▉| 418/422 [01:25<00:00, 11.76it/s]

This video datasets/train/Smoke or Fire or Open flame/Video78.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video82.mp4 has no audio!


100%|█████████▉| 420/422 [01:26<00:00, 11.85it/s]

This video datasets/train/Smoke or Fire or Open flame/Video85.mp4 has no audio!


100%|██████████| 422/422 [01:26<00:00,  4.89it/s]

This video datasets/train/Smoke or Fire or Open flame/Video91.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video99.mp4 has no audio!





In [24]:
for label in total:
    print(f"{label}: {correct[label]} / {total[label]}")

Crying: 36 / 50
Falling down: 0 / 30
Glass breaking: 10 / 52
Gunshot: 0 / 50
Normal: 0 / 200
Smoke or Fire or Open flame: 0 / 40
