### Set the corresponding values in the cell below. Afterwards, just run the following cells

In [1]:
import pathlib
# Class label csv path
labels_csv_path = 'datasets/home_labels.csv'
# Datset folder
dataset_root_path = pathlib.Path('datasets')
# output result csv path
output_csv_path = 'outputs/clap_results.csv'

### Import libraries 

In [2]:
from msclap import CLAP
import torch.nn.functional as F
import numpy as np
import torch
import os
from moviepy.editor import VideoFileClip

### Get Audio from video

### Read class Labels

In [3]:
import csv

label2id = {}
id2label = {}
with open(labels_csv_path, mode='r') as file:
    csv_reader = csv.reader(file)

    for i, row in enumerate(csv_reader):
        class_name = row[0]
        label2id[class_name] = i 
        id2label[i] = class_name

class_labels = list(label2id.keys())

print(f"{len(class_labels)} Unique classes: {class_labels}.")

70 Unique classes: ['No people is in the room', 'Studying', 'Typing', 'Using computer', 'Making phone calls', 'Play with phone/tablet', 'Playing with pets', 'Sitting down', 'Reading', 'Writing', 'Setting table', 'Eating food', 'Dining', 'Cooking', 'Sleeping', 'Laying down', 'Picking up objects', 'Brushing teeth', 'Showering', 'Playing music', 'TV', 'Cleaning', 'Doing laundry', 'Mopping floor', 'Vacumning', 'Organizing space', 'Sewing', 'Knitting', 'Decorating', 'Party', 'Chatting', 'Talking', 'Singing', 'Laughing', 'Speaking', 'Dancing', 'Drinking', 'Meditating', 'Drawing', 'Painting', 'Playing board games', 'Playing video games', 'Taking photos', 'Potluck', 'Working', 'Exercising', 'Walking', 'Running', 'Celebrating', 'Physical altercations', 'Verbal confrontations', 'Using drug', 'Theft or vandalism', 'Fighting', 'Domestic violence', 'Break in', 'Glass breaking', 'Unattended cooking', 'Smoke or Fire or Open flame', 'Smoking', 'Gunshot', 'Making noise', 'Falling down', 'Tripping', 'Cr

### CLAP inference with given audio

In [4]:
from tqdm import tqdm
from collections import defaultdict
import csv

train_paths = list(dataset_root_path.glob("train/*/*"))

# Load model (Choose between versions '2022' or '2023')
# The model weight will be downloaded automatically if `model_fp` is not specified
clap_model = CLAP(version = '2023', use_cuda=True)

correct = defaultdict(int)
total = defaultdict(int)
with open(output_csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["file path", "ground truth", "top1 pred", "confidence", "top2 pred", "confidence", "top3 pred", "confidence", "top4 pred", "confidence", "top5 pred", "confidence"])

    for path in tqdm(train_paths):
        label = str(path).split('\\')[-2]
        out_row = [path, label2id[label]]
        total[label] += 1

        # sanity format check
        try: assert(str(path).endswith(".mp4") or str(path).endswith(".wav"))
        except: print(f"ERROR: {path} is not a supported file type")

        path = path.as_posix()
        if str(path).endswith(".mp4"):
            video = VideoFileClip(path)
            if video.audio is None:
                print(f"This video {path} has no audio!")
                out_row.extend([-1, 0] * 5) # use -1 if no audio
                writer.writerow(out_row)
                continue
            else:
                audio_path = 'datasets/audios' + path[8:-3] + 'wav'
                audio_dir = "/".join(audio_path.split("/")[:-1])
                print(audio_dir)
                if not os.path.exists(audio_dir):
                        os.makedirs(audio_dir)
                audio_arr = video.audio.write_audiofile(audio_path, verbose=False)
                # print(f"Audio saved to {audio_path}")
        else:
            audio_path = path
        
        # make CLAP predictions
        with torch.no_grad():

            # Extract text embeddings
            text_embeddings = clap_model.get_text_embeddings([f"This is a sound of {c}"for c in class_labels])

            # Extract audio embeddings
            audio_embeddings = clap_model.get_audio_embeddings([audio_path])

            # Compute similarity between audio and text embeddings 
            similarities = clap_model.compute_similarity(audio_embeddings, text_embeddings)

        similarity = F.softmax(similarities, dim=1)
        values, indices = similarity[0].topk(5)
        pred_index = indices[0]

        # Output the results
        
        print("\nFile:", path)
        print("Ground Truth: {}".format(audio_path.split('/')[-2]))
        print("Top predictions:")
        for value, index in zip(values, indices):
            print(f"{class_labels[index]:>16s}: {100 * value.item():.2f}%")
            out_row.append(index.item())
            out_row.append(round(value.item(), 6))
        print("")
        writer.writerow(out_row)

        if label == class_labels[pred_index]: 
            correct[label] += 1
        


  0%|          | 0/424 [00:00<?, ?it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/9q8mf6GUiiE_000009_000019.wav


  0%|          | 0/424 [00:00<?, ?it/s]

MoviePy - Done.


  0%|          | 1/424 [00:03<23:04,  3.27s/it]


File: datasets/train/Crying/9q8mf6GUiiE_000009_000019.mp4
Ground Truth: Crying
Top predictions:
     Suffocating: 27.43%
          Crying: 19.03%
      Using drug: 9.28%
No people is in the room: 5.53%
     Eating food: 3.88%

datasets/audios/train/Crying


  0%|          | 1/424 [00:03<23:04,  3.27s/it]

MoviePy - Writing audio in datasets/audios/train/Crying/9s4bfiyc9Yc_000195_000205.wav


  0%|          | 2/424 [00:03<10:57,  1.56s/it]

MoviePy - Done.

File: datasets/train/Crying/9s4bfiyc9Yc_000195_000205.mp4
Ground Truth: Crying
Top predictions:
          Crying: 85.34%
     Suffocating: 11.34%
Playing with pets: 0.98%
Domestic violence: 0.41%
Making phone calls: 0.31%



  0%|          | 2/424 [00:03<10:57,  1.56s/it]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/a4xHx59uFgk_000003_000013.wav


  1%|          | 3/424 [00:04<07:09,  1.02s/it]

MoviePy - Done.

File: datasets/train/Crying/a4xHx59uFgk_000003_000013.mp4
Ground Truth: Crying
Top predictions:
No people is in the room: 32.01%
Playing with pets: 12.06%
          Crying: 11.23%
   Doing laundry: 7.60%
   Mopping floor: 4.96%



  1%|          | 3/424 [00:04<07:09,  1.02s/it]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/a50yimv7Lqg_000026_000036.wav


  1%|          | 3/424 [00:04<07:09,  1.02s/it]

MoviePy - Done.


  1%|          | 4/424 [00:04<05:33,  1.26it/s]


File: datasets/train/Crying/a50yimv7Lqg_000026_000036.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 34.94%
Playing with pets: 25.10%
     Celebrating: 8.47%
         Singing: 6.79%
     Eating food: 5.10%



  1%|          | 4/424 [00:04<05:33,  1.26it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Afhkn2h-wLE_000000_000010.wav


  1%|          | 5/424 [00:04<04:34,  1.53it/s]

MoviePy - Done.

File: datasets/train/Crying/Afhkn2h-wLE_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 48.32%
     Suffocating: 24.56%
Playing with pets: 8.80%
        Sleeping: 2.26%
     Celebrating: 1.73%



  1%|          | 5/424 [00:05<04:34,  1.53it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/aMAJ-DRt6bU_000116_000126.wav


  1%|▏         | 6/424 [00:05<04:01,  1.73it/s]

MoviePy - Done.

File: datasets/train/Crying/aMAJ-DRt6bU_000116_000126.mp4
Ground Truth: Crying
Top predictions:
          Crying: 50.21%
        Laughing: 14.85%
         Choking: 11.92%
        Sleeping: 9.63%
     Suffocating: 5.11%



  1%|▏         | 6/424 [00:05<04:01,  1.73it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/a_yBdlVHrK8_000000_000010.wav


  2%|▏         | 7/424 [00:05<03:35,  1.93it/s]

MoviePy - Done.

File: datasets/train/Crying/a_yBdlVHrK8_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 85.98%
     Suffocating: 10.52%
Domestic violence: 0.58%
Playing with pets: 0.45%
      Meditating: 0.42%



  2%|▏         | 7/424 [00:05<03:35,  1.93it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/fKrcKrs-o_w_000000_000010.wav


  2%|▏         | 7/424 [00:05<03:35,  1.93it/s]

MoviePy - Done.


  2%|▏         | 8/424 [00:06<03:32,  1.95it/s]


File: datasets/train/Crying/fKrcKrs-o_w_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 81.14%
     Suffocating: 8.59%
Playing with pets: 1.19%
Making phone calls: 1.02%
         Choking: 0.91%

datasets/audios/train/Crying


  2%|▏         | 8/424 [00:06<03:32,  1.95it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/fLzGu07zA-I_000008_000018.wav


  2%|▏         | 9/424 [00:06<03:12,  2.16it/s]

MoviePy - Done.

File: datasets/train/Crying/fLzGu07zA-I_000008_000018.mp4
Ground Truth: Crying
Top predictions:
          Crying: 89.18%
     Suffocating: 6.93%
Making phone calls: 1.33%
      Meditating: 0.40%
  Brushing teeth: 0.35%



  2%|▏         | 9/424 [00:06<03:12,  2.16it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/FM06rkKxFms_000010_000020.wav


  2%|▏         | 9/424 [00:06<03:12,  2.16it/s]

MoviePy - Done.


  2%|▏         | 10/424 [00:07<03:23,  2.03it/s]


File: datasets/train/Crying/FM06rkKxFms_000010_000020.mp4
Ground Truth: Crying
Top predictions:
          Crying: 60.26%
     Suffocating: 19.67%
Playing with pets: 8.63%
     Celebrating: 2.93%
        Laughing: 1.47%

datasets/audios/train/Crying


  2%|▏         | 10/424 [00:07<03:23,  2.03it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/G6QENvmNkRE_000000_000010.wav


  3%|▎         | 11/424 [00:07<03:08,  2.19it/s]

MoviePy - Done.

File: datasets/train/Crying/G6QENvmNkRE_000000_000010.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 41.36%
     Celebrating: 13.90%
        Sleeping: 11.10%
Playing with pets: 9.03%
         Singing: 2.90%



  3%|▎         | 11/424 [00:07<03:08,  2.19it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Ga9lFu4gurw_000015_000025.wav


  3%|▎         | 11/424 [00:07<03:08,  2.19it/s]

MoviePy - Done.


  3%|▎         | 12/424 [00:07<03:12,  2.14it/s]


File: datasets/train/Crying/Ga9lFu4gurw_000015_000025.mp4
Ground Truth: Crying
Top predictions:
          Crying: 79.00%
     Suffocating: 13.71%
Domestic violence: 3.25%
Playing with pets: 1.71%
Making phone calls: 0.36%

datasets/audios/train/Crying


  3%|▎         | 12/424 [00:08<03:12,  2.14it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/GnIgOUPiMqE_000023_000033.wav


  3%|▎         | 13/424 [00:08<03:00,  2.28it/s]

MoviePy - Done.

File: datasets/train/Crying/GnIgOUPiMqE_000023_000033.mp4
Ground Truth: Crying
Top predictions:
          Crying: 58.10%
     Suffocating: 17.91%
      Meditating: 9.41%
         Singing: 2.81%
Domestic violence: 2.22%



  3%|▎         | 13/424 [00:08<03:00,  2.28it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/gpaANcmFUOI_000003_000013.wav


  3%|▎         | 13/424 [00:08<03:00,  2.28it/s]

MoviePy - Done.

File: datasets/train/Crying/gpaANcmFUOI_000003_000013.mp4
Ground Truth: Crying
Top predictions:


  3%|▎         | 14/424 [00:08<03:05,  2.21it/s]

          Crying: 78.51%
     Suffocating: 13.95%
Playing with pets: 2.22%
No people is in the room: 1.07%
Domestic violence: 0.82%

datasets/audios/train/Crying


  3%|▎         | 14/424 [00:09<03:05,  2.21it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/G_-Hx6u40nE_000032_000042.wav


  4%|▎         | 15/424 [00:09<02:54,  2.35it/s]

MoviePy - Done.

File: datasets/train/Crying/G_-Hx6u40nE_000032_000042.mp4
Ground Truth: Crying
Top predictions:
          Crying: 70.15%
     Suffocating: 14.35%
      Meditating: 2.72%
Domestic violence: 2.17%
Playing with pets: 2.02%



  4%|▎         | 15/424 [00:09<02:54,  2.35it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/H85OllZBK70_000013_000023.wav


  4%|▎         | 15/424 [00:09<02:54,  2.35it/s]

MoviePy - Done.

File: datasets/train/Crying/H85OllZBK70_000013_000023.mp4
Ground Truth: Crying
Top predictions:


  4%|▍         | 16/424 [00:09<02:58,  2.29it/s]

          Crying: 62.78%
     Suffocating: 22.80%
      Meditating: 3.62%
No people is in the room: 2.27%
     Celebrating: 1.60%

datasets/audios/train/Crying


  4%|▍         | 16/424 [00:09<02:58,  2.29it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/Ha3vwLcjMAs_000157_000167.wav


  4%|▍         | 17/424 [00:10<02:49,  2.40it/s]

MoviePy - Done.

File: datasets/train/Crying/Ha3vwLcjMAs_000157_000167.mp4
Ground Truth: Crying
Top predictions:
   Doing laundry: 32.95%
  Brushing teeth: 19.10%
     Suffocating: 12.14%
          Crying: 7.47%
Playing with pets: 7.32%



  4%|▍         | 17/424 [00:10<02:49,  2.40it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/l5UWczmUVeY_000019_000029.wav


  4%|▍         | 17/424 [00:10<02:49,  2.40it/s]

MoviePy - Done.


  4%|▍         | 18/424 [00:10<03:00,  2.25it/s]


File: datasets/train/Crying/l5UWczmUVeY_000019_000029.mp4
Ground Truth: Crying
Top predictions:
          Crying: 42.06%
     Suffocating: 40.77%
Playing with pets: 3.80%
      Meditating: 2.20%
        Laughing: 2.02%

datasets/audios/train/Crying


  4%|▍         | 18/424 [00:10<03:00,  2.25it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/oH_BDY4bWp4_000000_000010.wav


  4%|▍         | 18/424 [00:10<03:00,  2.25it/s]

MoviePy - Done.


  4%|▍         | 19/424 [00:10<02:54,  2.32it/s]


File: datasets/train/Crying/oH_BDY4bWp4_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 84.91%
     Suffocating: 7.72%
Domestic violence: 1.36%
      Using drug: 1.22%
Making phone calls: 1.06%



  4%|▍         | 19/424 [00:11<02:54,  2.32it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Oi7DvmU-tF0_000000_000010.wav


  4%|▍         | 19/424 [00:11<02:54,  2.32it/s]

MoviePy - Done.


  5%|▍         | 20/424 [00:11<03:02,  2.22it/s]


File: datasets/train/Crying/Oi7DvmU-tF0_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 91.47%
     Suffocating: 5.36%
Domestic violence: 0.96%
Making phone calls: 0.28%
Playing with pets: 0.28%

datasets/audios/train/Crying


  5%|▍         | 20/424 [00:11<03:02,  2.22it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/oNiWBhu1-JQ_000000_000010.wav


  5%|▍         | 21/424 [00:11<02:51,  2.35it/s]

MoviePy - Done.

File: datasets/train/Crying/oNiWBhu1-JQ_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 71.23%
     Suffocating: 21.80%
      Meditating: 1.46%
        Sleeping: 1.14%
Playing with pets: 0.92%



  5%|▍         | 21/424 [00:11<02:51,  2.35it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/Pa4kyZAUTXQ_000017_000027.wav


  5%|▍         | 21/424 [00:12<02:51,  2.35it/s]

MoviePy - Done.


                                                


File: datasets/train/Crying/Pa4kyZAUTXQ_000017_000027.mp4
Ground Truth: Crying
Top predictions:
          Crying: 61.20%
     Suffocating: 22.14%
Playing with pets: 3.98%
        Sleeping: 2.40%
      Meditating: 1.86%

datasets/audios/train/Crying


  5%|▌         | 22/424 [00:12<02:59,  2.24it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/pg4HbzuxCIE_000022_000032.wav


  5%|▌         | 23/424 [00:12<02:49,  2.36it/s]

MoviePy - Done.

File: datasets/train/Crying/pg4HbzuxCIE_000022_000032.mp4
Ground Truth: Crying
Top predictions:
          Crying: 86.26%
     Suffocating: 9.90%
Playing with pets: 0.84%
Making phone calls: 0.56%
      Meditating: 0.46%



  5%|▌         | 23/424 [00:12<02:49,  2.36it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/pHgqoFKTBDc_000004_000014.wav


  5%|▌         | 23/424 [00:12<02:49,  2.36it/s]

MoviePy - Done.


  6%|▌         | 24/424 [00:13<02:54,  2.29it/s]


File: datasets/train/Crying/pHgqoFKTBDc_000004_000014.mp4
Ground Truth: Crying
Top predictions:
          Crying: 85.14%
     Suffocating: 6.41%
      Meditating: 2.57%
      Using drug: 1.52%
        Sleeping: 1.04%

datasets/audios/train/Crying


  6%|▌         | 24/424 [00:13<02:54,  2.29it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/pZxdg1Stvb8_000000_000010.wav


  6%|▌         | 25/424 [00:13<02:45,  2.41it/s]

MoviePy - Done.

File: datasets/train/Crying/pZxdg1Stvb8_000000_000010.mp4
Ground Truth: Crying
Top predictions:
          Crying: 63.94%
     Suffocating: 16.75%
      Using drug: 3.17%
     Celebrating: 3.16%
Playing with pets: 1.78%



  6%|▌         | 25/424 [00:13<02:45,  2.41it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/QApH290EHcU_000035_000045.wav


  6%|▌         | 25/424 [00:13<02:45,  2.41it/s]

MoviePy - Done.

File: datasets/train/Crying/QApH290EHcU_000035_000045.mp4
Ground Truth: Crying
Top predictions:


  6%|▌         | 26/424 [00:13<02:51,  2.31it/s]

          Crying: 46.15%
Playing with pets: 18.49%
     Suffocating: 13.17%
        Laughing: 5.35%
      Meditating: 2.95%

datasets/audios/train/Crying


  6%|▌         | 26/424 [00:14<02:51,  2.31it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/qFOg87dNSh4_000001_000011.wav


  6%|▋         | 27/424 [00:14<02:46,  2.39it/s]

MoviePy - Done.

File: datasets/train/Crying/qFOg87dNSh4_000001_000011.mp4
Ground Truth: Crying
Top predictions:
          Crying: 56.36%
     Suffocating: 18.35%
        Laughing: 15.92%
Playing with pets: 3.96%
  Brushing teeth: 1.28%



  6%|▋         | 27/424 [00:14<02:46,  2.39it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/quf-iuFOT4s_000000_000010.wav


  6%|▋         | 27/424 [00:14<02:46,  2.39it/s]

MoviePy - Done.


  7%|▋         | 28/424 [00:14<02:56,  2.24it/s]


File: datasets/train/Crying/quf-iuFOT4s_000000_000010.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 60.45%
Playing with pets: 12.69%
          Crying: 7.15%
     Suffocating: 3.39%
     Celebrating: 3.04%



  7%|▋         | 28/424 [00:15<02:56,  2.24it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/rCwNyAjkGPQ_000031_000041.wav


  7%|▋         | 29/424 [00:15<02:49,  2.32it/s]

MoviePy - Done.

File: datasets/train/Crying/rCwNyAjkGPQ_000031_000041.mp4
Ground Truth: Crying
Top predictions:
No people is in the room: 48.88%
          Crying: 8.69%
     Suffocating: 7.59%
      Meditating: 6.62%
     Celebrating: 5.45%



  7%|▋         | 29/424 [00:15<02:49,  2.32it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/sjZsedv-jG8_000008_000018.wav


  7%|▋         | 29/424 [00:15<02:49,  2.32it/s]

MoviePy - Done.


  7%|▋         | 30/424 [00:15<02:54,  2.25it/s]


File: datasets/train/Crying/sjZsedv-jG8_000008_000018.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 45.73%
          Crying: 30.92%
Playing with pets: 9.08%
     Suffocating: 3.71%
         Talking: 2.12%



  7%|▋         | 30/424 [00:15<02:54,  2.25it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/tIm0yA_u5Qc_000000_000010.wav


  7%|▋         | 31/424 [00:16<02:50,  2.31it/s]

MoviePy - Done.

File: datasets/train/Crying/tIm0yA_u5Qc_000000_000010.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 40.80%
Playing with pets: 34.74%
     Celebrating: 10.10%
     Suffocating: 4.36%
No people is in the room: 2.86%



  7%|▋         | 31/424 [00:16<02:50,  2.31it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/tlY2Cy-VH_g_000005_000015.wav


  7%|▋         | 31/424 [00:16<02:50,  2.31it/s]

MoviePy - Done.


  8%|▊         | 32/424 [00:16<03:04,  2.13it/s]


File: datasets/train/Crying/tlY2Cy-VH_g_000005_000015.mp4
Ground Truth: Crying
Top predictions:
          Crying: 45.34%
     Suffocating: 40.24%
No people is in the room: 3.43%
      Meditating: 2.19%
Domestic violence: 1.66%

datasets/audios/train/Crying


  8%|▊         | 32/424 [00:16<03:04,  2.13it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/TNl9NjfWZ1o_000084_000094.wav


  8%|▊         | 33/424 [00:17<02:53,  2.25it/s]

MoviePy - Done.

File: datasets/train/Crying/TNl9NjfWZ1o_000084_000094.mp4
Ground Truth: Crying
Top predictions:
     Suffocating: 37.59%
          Crying: 37.42%
Playing with pets: 16.15%
        Laughing: 1.26%
Domestic violence: 0.97%



  8%|▊         | 33/424 [00:17<02:53,  2.25it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/u-oc-Eln_yc_000003_000013.wav


  8%|▊         | 33/424 [00:17<02:53,  2.25it/s]

MoviePy - Done.


  8%|▊         | 34/424 [00:17<02:53,  2.25it/s]


File: datasets/train/Crying/u-oc-Eln_yc_000003_000013.mp4
Ground Truth: Crying
Top predictions:
          Crying: 48.61%
     Suffocating: 22.84%
Playing with pets: 9.09%
No people is in the room: 6.96%
Domestic violence: 3.57%

datasets/audios/train/Crying


  8%|▊         | 34/424 [00:17<02:53,  2.25it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/Ux2iImyAiL4_000001_000011.wav


  8%|▊         | 34/424 [00:17<02:53,  2.25it/s]

MoviePy - Done.


  8%|▊         | 35/424 [00:17<02:49,  2.30it/s]


File: datasets/train/Crying/Ux2iImyAiL4_000001_000011.mp4
Ground Truth: Crying
Top predictions:
          Crying: 44.48%
Playing with pets: 18.15%
     Suffocating: 14.10%
        Sleeping: 7.98%
     Eating food: 2.89%



  8%|▊         | 35/424 [00:18<02:49,  2.30it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/VcHysqkJPSE_000002_000012.wav


  8%|▊         | 35/424 [00:18<02:49,  2.30it/s]

MoviePy - Done.


  8%|▊         | 36/424 [00:18<02:48,  2.30it/s]


File: datasets/train/Crying/VcHysqkJPSE_000002_000012.mp4
Ground Truth: Crying
Top predictions:
     Celebrating: 17.81%
No people is in the room: 16.71%
      Meditating: 9.20%
         Talking: 6.87%
      Using drug: 4.60%

datasets/audios/train/Crying


  8%|▊         | 36/424 [00:18<02:48,  2.30it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/X3G7b1JNAfs_000000_000010.wav


  9%|▊         | 37/424 [00:18<02:41,  2.40it/s]

MoviePy - Done.

File: datasets/train/Crying/X3G7b1JNAfs_000000_000010.mp4
Ground Truth: Crying
Top predictions:
        Laughing: 45.90%
     Celebrating: 13.85%
Playing with pets: 8.58%
     Suffocating: 7.81%
          Crying: 4.63%



  9%|▊         | 37/424 [00:18<02:41,  2.40it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/XBNgVxsrqo4_000010_000020.wav


  9%|▊         | 37/424 [00:19<02:41,  2.40it/s]

MoviePy - Done.

File: datasets/train/Crying/XBNgVxsrqo4_000010_000020.mp4
Ground Truth: Crying
Top predictions:


  9%|▉         | 38/424 [00:19<02:46,  2.32it/s]

          Crying: 46.85%
     Suffocating: 16.44%
Domestic violence: 7.06%
No people is in the room: 5.72%
Playing with pets: 4.06%

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/xDLo1xt8P6U_000014_000024.wav


  9%|▉         | 39/424 [00:19<02:35,  2.47it/s]

MoviePy - Done.

File: datasets/train/Crying/xDLo1xt8P6U_000014_000024.mp4
Ground Truth: Crying
Top predictions:
          Crying: 78.78%
     Suffocating: 10.48%
Domestic violence: 3.26%
No people is in the room: 3.19%
      Meditating: 1.20%



  9%|▉         | 39/424 [00:19<02:35,  2.47it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/yAgm_XnqBec_000010_000020.wav


  9%|▉         | 39/424 [00:19<02:35,  2.47it/s]

MoviePy - Done.

File: datasets/train/Crying/yAgm_XnqBec_000010_000020.mp4
Ground Truth: Crying
Top predictions:


  9%|▉         | 40/424 [00:20<02:48,  2.28it/s]

          Crying: 62.37%
     Suffocating: 21.02%
Domestic violence: 6.38%
No people is in the room: 3.90%
Playing with pets: 3.40%

datasets/audios/train/Crying


  9%|▉         | 40/424 [00:20<02:48,  2.28it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/yDD0alN95O8_000015_000025.wav


 10%|▉         | 41/424 [00:20<02:40,  2.38it/s]

MoviePy - Done.

File: datasets/train/Crying/yDD0alN95O8_000015_000025.mp4
Ground Truth: Crying
Top predictions:
No people is in the room: 41.04%
     Suffocating: 19.09%
          Crying: 10.20%
Domestic violence: 7.49%
Playing with pets: 4.11%



 10%|▉         | 41/424 [00:20<02:40,  2.38it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/yDKz5An0qd0_000024_000034.wav


 10%|▉         | 41/424 [00:20<02:40,  2.38it/s]

MoviePy - Done.

File: datasets/train/Crying/yDKz5An0qd0_000024_000034.mp4
Ground Truth: Crying
Top predictions:


 10%|▉         | 42/424 [00:20<02:46,  2.29it/s]

          Crying: 43.29%
        Laughing: 10.39%
        Sleeping: 8.05%
     Suffocating: 6.14%
         Talking: 5.92%



 10%|▉         | 42/424 [00:21<02:46,  2.29it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/zQWdo_Er87M_000008_000018.wav


 10%|█         | 43/424 [00:21<02:42,  2.34it/s]

MoviePy - Done.

File: datasets/train/Crying/zQWdo_Er87M_000008_000018.mp4
Ground Truth: Crying
Top predictions:
          Crying: 76.88%
     Suffocating: 15.63%
Domestic violence: 1.58%
      Meditating: 1.53%
No people is in the room: 0.97%



 10%|█         | 43/424 [00:21<02:42,  2.34it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/ZrFW1iPMnig_000060_000070.wav


 10%|█         | 43/424 [00:21<02:42,  2.34it/s]

MoviePy - Done.


 10%|█         | 44/424 [00:21<02:42,  2.34it/s]


File: datasets/train/Crying/ZrFW1iPMnig_000060_000070.mp4
Ground Truth: Crying
Top predictions:
          Crying: 68.31%
     Suffocating: 13.06%
        Laughing: 8.01%
Playing with pets: 3.93%
  Brushing teeth: 0.88%

datasets/audios/train/Crying


 10%|█         | 44/424 [00:21<02:42,  2.34it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/zwOBqeFTgiU_000012_000022.wav


 11%|█         | 45/424 [00:22<02:34,  2.45it/s]

MoviePy - Done.

File: datasets/train/Crying/zwOBqeFTgiU_000012_000022.mp4
Ground Truth: Crying
Top predictions:
          Crying: 65.21%
     Suffocating: 21.30%
Domestic violence: 4.50%
Playing with pets: 2.93%
No people is in the room: 1.18%



 11%|█         | 45/424 [00:22<02:34,  2.45it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_ceBK5pQTrs_000033_000043.wav


 11%|█         | 45/424 [00:22<02:34,  2.45it/s]

MoviePy - Done.


 11%|█         | 46/424 [00:22<02:48,  2.24it/s]


File: datasets/train/Crying/_ceBK5pQTrs_000033_000043.mp4
Ground Truth: Crying
Top predictions:
          Crying: 68.13%
     Suffocating: 17.26%
Playing with pets: 2.69%
Domestic violence: 2.46%
No people is in the room: 2.26%



 11%|█         | 46/424 [00:22<02:48,  2.24it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_fVz9qyplBc_000078_000088.wav


 11%|█         | 47/424 [00:23<02:43,  2.30it/s]

MoviePy - Done.

File: datasets/train/Crying/_fVz9qyplBc_000078_000088.mp4
Ground Truth: Crying
Top predictions:
          Crying: 73.88%
     Suffocating: 17.38%
Playing with pets: 4.22%
Domestic violence: 1.11%
      Meditating: 0.56%



 11%|█         | 47/424 [00:23<02:43,  2.30it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_k0fnTiKEMc_000077_000087.wav


 11%|█         | 47/424 [00:23<02:43,  2.30it/s]

MoviePy - Done.


 11%|█▏        | 48/424 [00:23<02:45,  2.27it/s]


File: datasets/train/Crying/_k0fnTiKEMc_000077_000087.mp4
Ground Truth: Crying
Top predictions:
          Crying: 75.52%
     Suffocating: 13.14%
No people is in the room: 4.42%
        Laughing: 1.67%
Playing with pets: 1.40%

datasets/audios/train/Crying


 11%|█▏        | 48/424 [00:23<02:45,  2.27it/s]

MoviePy - Writing audio in datasets/audios/train/Crying/_uPh9i-xaaE_000094_000104.wav


 12%|█▏        | 49/424 [00:23<02:39,  2.36it/s]

MoviePy - Done.

File: datasets/train/Crying/_uPh9i-xaaE_000094_000104.mp4
Ground Truth: Crying
Top predictions:
          Crying: 91.30%
     Suffocating: 6.20%
Making phone calls: 0.50%
Playing with pets: 0.46%
      Meditating: 0.31%



 12%|█▏        | 49/424 [00:24<02:39,  2.36it/s]

datasets/audios/train/Crying
MoviePy - Writing audio in datasets/audios/train/Crying/_wRQiJdk2Rw_000004_000014.wav


 12%|█▏        | 49/424 [00:24<02:39,  2.36it/s]

MoviePy - Done.


 12%|█▏        | 50/424 [00:24<02:44,  2.28it/s]


File: datasets/train/Crying/_wRQiJdk2Rw_000004_000014.mp4
Ground Truth: Crying
Top predictions:
No people is in the room: 40.78%
     Eating food: 15.98%
          Crying: 13.01%
        Laughing: 7.48%
   Doing laundry: 3.35%



 12%|█▏        | 52/424 [00:24<01:44,  3.55it/s]

This video datasets/train/Falling down/fall-01-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-02-cam0-rgb.mp4 has no audio!


 13%|█▎        | 55/424 [00:24<01:04,  5.68it/s]

This video datasets/train/Falling down/fall-03-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-04-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-05-cam0-rgb.mp4 has no audio!


 14%|█▎        | 58/424 [00:25<00:45,  8.01it/s]

This video datasets/train/Falling down/fall-06-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-07-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-08-cam0-rgb.mp4 has no audio!


 14%|█▍        | 61/424 [00:25<00:40,  9.01it/s]

This video datasets/train/Falling down/fall-09-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-10-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-11-cam0-rgb.mp4 has no audio!


 15%|█▍        | 63/424 [00:25<00:38,  9.42it/s]

This video datasets/train/Falling down/fall-12-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-13-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-14-cam0-rgb.mp4 has no audio!


 16%|█▌        | 67/424 [00:26<00:34, 10.22it/s]

This video datasets/train/Falling down/fall-15-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-16-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-17-cam0-rgb.mp4 has no audio!


 16%|█▋        | 69/424 [00:26<00:33, 10.54it/s]

This video datasets/train/Falling down/fall-18-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-19-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-20-cam0-rgb.mp4 has no audio!


 17%|█▋        | 73/424 [00:26<00:34, 10.24it/s]

This video datasets/train/Falling down/fall-21-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-22-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-23-cam0-rgb.mp4 has no audio!


 18%|█▊        | 75/424 [00:26<00:36,  9.48it/s]

This video datasets/train/Falling down/fall-24-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-25-cam0-rgb.mp4 has no audio!


 18%|█▊        | 77/424 [00:27<00:34, 10.13it/s]

This video datasets/train/Falling down/fall-26-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-27-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-28-cam0-rgb.mp4 has no audio!


 19%|█▉        | 81/424 [00:27<00:33, 10.24it/s]

This video datasets/train/Falling down/fall-29-cam0-rgb.mp4 has no audio!
This video datasets/train/Falling down/fall-30-cam0-rgb.mp4 has no audio!

File: datasets/train/Glass breaking/1000_1380.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 87.66%
        Break in: 10.62%
Theft or vandalism: 0.81%
Picking up objects: 0.61%
Furniture Collapse: 0.13%



 20%|█▉        | 83/424 [00:27<00:33, 10.30it/s]


File: datasets/train/Glass breaking/1000_1480.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 95.94%
        Break in: 3.29%
Picking up objects: 0.45%
Theft or vandalism: 0.08%
         Smoking: 0.08%


File: datasets/train/Glass breaking/1000_1560.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 34.79%
Picking up objects: 28.76%
Theft or vandalism: 22.16%
        Break in: 12.95%
Furniture Collapse: 0.46%



 20%|██        | 85/424 [00:27<00:40,  8.39it/s]


File: datasets/train/Glass breaking/1000_1680.wav
Ground Truth: Glass breaking
Top predictions:
        Break in: 80.65%
  Glass breaking: 18.10%
Theft or vandalism: 0.37%
Picking up objects: 0.32%
        Tripping: 0.18%


File: datasets/train/Glass breaking/1000_1720.wav
Ground Truth: Glass breaking
Top predictions:
Picking up objects: 56.56%
  Glass breaking: 19.08%
        Break in: 18.38%
Theft or vandalism: 4.07%
Furniture Collapse: 0.77%



 21%|██        | 87/424 [00:28<00:37,  8.89it/s]


File: datasets/train/Glass breaking/1000_1780.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 47.57%
        Break in: 37.60%
Theft or vandalism: 13.06%
Furniture Collapse: 0.66%
Picking up objects: 0.60%


File: datasets/train/Glass breaking/1000_1820.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 79.04%
        Break in: 15.90%
Theft or vandalism: 4.03%
Furniture Collapse: 0.39%
Picking up objects: 0.16%



 21%|██        | 88/424 [00:28<00:39,  8.42it/s]


File: datasets/train/Glass breaking/1000_2040.wav
Ground Truth: Glass breaking
Top predictions:
        Break in: 57.17%
Theft or vandalism: 22.03%
Furniture Collapse: 7.58%
  Glass breaking: 4.25%
     Laying down: 2.38%



 21%|██        | 90/424 [00:28<00:44,  7.47it/s]


File: datasets/train/Glass breaking/1000_2460.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 53.22%
        Break in: 27.71%
Theft or vandalism: 14.09%
Furniture Collapse: 4.28%
Physical altercations: 0.27%


File: datasets/train/Glass breaking/1000_2900.wav
Ground Truth: Glass breaking
Top predictions:
Theft or vandalism: 93.35%
  Glass breaking: 2.08%
Physical altercations: 1.58%
        Break in: 1.26%
Furniture Collapse: 0.53%



 22%|██▏       | 92/424 [00:28<00:38,  8.60it/s]


File: datasets/train/Glass breaking/1000_3340.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 42.95%
        Break in: 23.15%
Theft or vandalism: 16.86%
Furniture Collapse: 15.82%
Physical altercations: 0.41%


File: datasets/train/Glass breaking/2000_3000.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 79.00%
        Break in: 19.02%
Theft or vandalism: 0.89%
        Tripping: 0.34%
Physical altercations: 0.23%


File: datasets/train/Glass breaking/2000_3020.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 92.79%
        Break in: 6.98%
Theft or vandalism: 0.17%
Furniture Collapse: 0.05%
Picking up objects: 0.00%



 23%|██▎       | 96/424 [00:29<00:33,  9.78it/s]


File: datasets/train/Glass breaking/2000_3040.wav
Ground Truth: Glass breaking
Top predictions:
        Tripping: 46.19%
        Break in: 16.19%
Theft or vandalism: 11.44%
         Walking: 9.41%
         Running: 8.23%


File: datasets/train/Glass breaking/2000_3100.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 64.19%
        Break in: 34.81%
Furniture Collapse: 0.59%
Theft or vandalism: 0.32%
    Falling down: 0.05%


File: datasets/train/Glass breaking/2000_3600.wav
Ground Truth: Glass breaking
Top predictions:
Theft or vandalism: 53.85%
Picking up objects: 28.55%
  Glass breaking: 6.67%
        Break in: 3.95%
Physical altercations: 3.15%



 23%|██▎       | 99/424 [00:29<00:32,  9.91it/s]


File: datasets/train/Glass breaking/2560_2940.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 88.62%
        Break in: 10.07%
Theft or vandalism: 0.68%
Picking up objects: 0.40%
Furniture Collapse: 0.12%


File: datasets/train/Glass breaking/2680_4100.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 51.30%
        Break in: 31.62%
Furniture Collapse: 10.59%
Theft or vandalism: 6.34%
    Falling down: 0.09%


File: datasets/train/Glass breaking/2800_4200.wav
Ground Truth: Glass breaking
Top predictions:
Theft or vandalism: 45.43%
  Glass breaking: 35.72%
        Break in: 11.41%
Furniture Collapse: 6.35%
Physical altercations: 0.46%



 24%|██▍       | 101/424 [00:29<00:31, 10.10it/s]


File: datasets/train/Glass breaking/3000_3560.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 86.09%
        Break in: 8.88%
Theft or vandalism: 2.20%
Picking up objects: 1.90%
   Setting table: 0.22%


File: datasets/train/Glass breaking/3000_3580.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 70.51%
        Break in: 27.31%
Theft or vandalism: 1.26%
Picking up objects: 0.33%
Furniture Collapse: 0.31%


File: datasets/train/Glass breaking/3000_3700.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 96.83%
        Break in: 3.14%
Theft or vandalism: 0.01%
Furniture Collapse: 0.01%
         Gunshot: 0.00%



 24%|██▍       | 103/424 [00:29<00:31, 10.26it/s]


File: datasets/train/Glass breaking/3000_3780.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 44.45%
        Break in: 39.54%
Theft or vandalism: 13.76%
Picking up objects: 0.90%
Furniture Collapse: 0.66%


File: datasets/train/Glass breaking/3000_3800.wav
Ground Truth: Glass breaking
Top predictions:


 25%|██▍       | 105/424 [00:30<00:38,  8.23it/s]

  Glass breaking: 55.10%
        Break in: 42.39%
Picking up objects: 0.94%
    Falling down: 0.39%
Furniture Collapse: 0.36%


File: datasets/train/Glass breaking/3000_3820.wav
Ground Truth: Glass breaking
Top predictions:
        Break in: 61.48%
  Glass breaking: 16.78%
Picking up objects: 2.96%
   Setting table: 2.49%
          Dining: 2.33%



 25%|██▌       | 107/424 [00:30<00:35,  8.82it/s]


File: datasets/train/Glass breaking/3000_4100.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 84.76%
        Break in: 14.44%
Theft or vandalism: 0.39%
Furniture Collapse: 0.32%
         Gunshot: 0.04%


File: datasets/train/Glass breaking/3000_4200.wav
Ground Truth: Glass breaking
Top predictions:
        Break in: 52.86%
  Glass breaking: 41.44%
Theft or vandalism: 3.94%
Furniture Collapse: 1.60%
    Falling down: 0.11%


File: datasets/train/Glass breaking/3000_4320.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 52.91%
        Break in: 46.79%
Furniture Collapse: 0.16%
Theft or vandalism: 0.11%
    Falling down: 0.01%



 26%|██▌       | 110/424 [00:30<00:34,  9.14it/s]


File: datasets/train/Glass breaking/3000_4600.wav
Ground Truth: Glass breaking
Top predictions:
Theft or vandalism: 53.97%
Picking up objects: 28.10%
  Glass breaking: 6.57%
        Break in: 4.20%
Physical altercations: 3.25%


File: datasets/train/Glass breaking/4000_5100.wav
Ground Truth: Glass breaking
Top predictions:
        Break in: 65.76%
  Glass breaking: 29.71%
Theft or vandalism: 2.13%
Furniture Collapse: 1.56%
    Falling down: 0.24%



 26%|██▋       | 112/424 [00:31<00:40,  7.63it/s]


File: datasets/train/Glass breaking/4000_6240.wav
Ground Truth: Glass breaking
Top predictions:
Theft or vandalism: 44.93%
Furniture Collapse: 34.69%
        Break in: 12.15%
  Glass breaking: 7.08%
Picking up objects: 0.40%


File: datasets/train/Glass breaking/4540_5140.wav
Ground Truth: Glass breaking
Top predictions:
        Break in: 69.83%
  Glass breaking: 18.64%
Theft or vandalism: 10.08%
Physical altercations: 0.55%
Picking up objects: 0.20%



 27%|██▋       | 115/424 [00:31<00:34,  8.93it/s]


File: datasets/train/Glass breaking/5000_6400.wav
Ground Truth: Glass breaking
Top predictions:
Playing with pets: 22.18%
Theft or vandalism: 16.84%
  Glass breaking: 10.85%
Physical altercations: 9.75%
Domestic violence: 5.95%


File: datasets/train/Glass breaking/5000_6460.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 53.03%
        Break in: 28.06%
Theft or vandalism: 13.99%
Furniture Collapse: 4.23%
Physical altercations: 0.27%


File: datasets/train/Glass breaking/5000_6480.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 65.28%
        Break in: 21.37%
Furniture Collapse: 7.88%
Theft or vandalism: 5.32%
    Falling down: 0.08%



 28%|██▊       | 117/424 [00:31<00:32,  9.37it/s]


File: datasets/train/Glass breaking/5000_7380.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 67.21%
Theft or vandalism: 12.52%
        Break in: 11.80%
Furniture Collapse: 3.24%
Domestic violence: 2.00%


File: datasets/train/Glass breaking/5020_5600.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 70.44%
        Break in: 26.73%
Theft or vandalism: 1.56%
Picking up objects: 0.57%
Furniture Collapse: 0.31%



 28%|██▊       | 118/424 [00:31<00:43,  7.08it/s]


File: datasets/train/Glass breaking/5300_7040.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 63.55%
        Break in: 29.38%
Furniture Collapse: 4.46%
Theft or vandalism: 2.52%
    Falling down: 0.03%


File: datasets/train/Glass breaking/5380_7720.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 53.25%
Furniture Collapse: 16.71%
        Break in: 14.74%
Theft or vandalism: 14.54%
    Falling down: 0.27%



 29%|██▉       | 122/424 [00:32<00:34,  8.65it/s]


File: datasets/train/Glass breaking/5480_6940.wav
Ground Truth: Glass breaking
Top predictions:
Theft or vandalism: 58.65%
        Break in: 24.03%
  Glass breaking: 10.11%
         Gunshot: 2.03%
Furniture Collapse: 1.67%


File: datasets/train/Glass breaking/5720_8660.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 37.57%
        Break in: 26.85%
Theft or vandalism: 17.46%
Furniture Collapse: 17.32%
    Falling down: 0.49%


File: datasets/train/Glass breaking/5800_6180.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 86.33%
        Break in: 11.34%
Theft or vandalism: 1.00%
Picking up objects: 0.94%
Furniture Collapse: 0.15%



 29%|██▉       | 123/424 [00:32<00:34,  8.75it/s]


File: datasets/train/Glass breaking/5860_6580.wav
Ground Truth: Glass breaking
Top predictions:
        Break in: 88.13%
  Glass breaking: 11.32%
Furniture Collapse: 0.18%
Picking up objects: 0.12%
        Tripping: 0.08%



 29%|██▉       | 125/424 [00:32<00:41,  7.29it/s]


File: datasets/train/Glass breaking/6300_7880.wav
Ground Truth: Glass breaking
Top predictions:
Theft or vandalism: 45.36%
  Glass breaking: 29.65%
        Break in: 16.93%
Furniture Collapse: 7.50%
    Falling down: 0.18%


File: datasets/train/Glass breaking/6340_7020.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 92.52%
        Break in: 7.42%
Theft or vandalism: 0.03%
         Gunshot: 0.01%
Furniture Collapse: 0.01%


File: datasets/train/Glass breaking/6360_6920.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 89.51%
        Break in: 9.72%
Theft or vandalism: 0.44%
Picking up objects: 0.13%
Smoke or Fire or Open flame: 0.05%



 29%|██▉       | 125/424 [00:33<00:41,  7.29it/s]

datasets/audios/train/Glass breaking
MoviePy - Writing audio in datasets/audios/train/Glass breaking/breaking_glass_1.wav


 29%|██▉       | 125/424 [00:33<00:41,  7.29it/s]

MoviePy - Done.

File: datasets/train/Glass breaking/breaking_glass_1.mp4
Ground Truth: Glass breaking
Top predictions:


 30%|██▉       | 127/424 [00:33<01:01,  4.79it/s]

  Glass breaking: 92.38%
Furniture Collapse: 4.69%
Theft or vandalism: 2.61%
        Break in: 0.23%
    Falling down: 0.04%



 30%|██▉       | 127/424 [00:33<01:01,  4.79it/s]

datasets/audios/train/Glass breaking
MoviePy - Writing audio in datasets/audios/train/Glass breaking/breaking_glass_2.wav


 30%|██▉       | 127/424 [00:33<01:01,  4.79it/s]

MoviePy - Done.


 31%|███       | 130/424 [00:34<01:05,  4.48it/s]


File: datasets/train/Glass breaking/breaking_glass_2.mp4
Ground Truth: Glass breaking
Top predictions:
Furniture Collapse: 61.06%
  Glass breaking: 33.48%
        Break in: 2.03%
    Falling down: 1.97%
Theft or vandalism: 0.59%


File: datasets/train/Glass breaking/glass_break_1.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 86.63%
        Break in: 8.08%
Theft or vandalism: 3.44%
Furniture Collapse: 0.77%
Picking up objects: 0.67%


File: datasets/train/Glass breaking/glass_break_2.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 92.30%
        Break in: 7.41%
Furniture Collapse: 0.17%
Theft or vandalism: 0.06%
    Falling down: 0.02%



 31%|███       | 132/424 [00:34<00:50,  5.74it/s]


File: datasets/train/Glass breaking/glass_break_3.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 98.19%
        Break in: 1.75%
Furniture Collapse: 0.02%
Theft or vandalism: 0.01%
     Celebrating: 0.00%


File: datasets/train/Glass breaking/glass_break_4.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 91.25%
        Break in: 6.78%
Theft or vandalism: 1.56%
Furniture Collapse: 0.21%
      Decorating: 0.04%


File: datasets/train/Glass breaking/glass_break_5.wav
Ground Truth: Glass breaking
Top predictions:
  Glass breaking: 97.23%
        Break in: 2.48%
Theft or vandalism: 0.13%
Furniture Collapse: 0.13%
    Falling down: 0.01%



 32%|███▏      | 134/424 [00:34<00:43,  6.65it/s]


File: datasets/train/Gunshot/1000_1480.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 88.98%
  Glass breaking: 8.25%
        Tripping: 1.55%
Picking up objects: 0.34%
Theft or vandalism: 0.30%



 32%|███▏      | 137/424 [00:34<00:40,  7.16it/s]


File: datasets/train/Gunshot/1000_1560.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 99.43%
        Break in: 0.36%
Theft or vandalism: 0.05%
   Taking photos: 0.04%
        Tripping: 0.02%


File: datasets/train/Gunshot/1000_1680.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 92.65%
        Break in: 6.21%
      Exercising: 0.26%
        Tripping: 0.22%
        Fighting: 0.09%


File: datasets/train/Gunshot/1000_1720.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 64.92%
         Gunshot: 20.11%
      Exercising: 3.54%
        Tripping: 3.12%
Furniture Collapse: 2.40%



 33%|███▎      | 140/424 [00:35<00:34,  8.14it/s]


File: datasets/train/Gunshot/1000_1820.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 96.60%
        Break in: 2.32%
      Exercising: 0.48%
     Laying down: 0.14%
Theft or vandalism: 0.11%


File: datasets/train/Gunshot/1000_2400.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 93.37%
        Break in: 5.92%
     Laying down: 0.33%
        Tripping: 0.07%
Theft or vandalism: 0.06%


File: datasets/train/Gunshot/1000_3340.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 99.35%
Verbal confrontations: 0.12%
      Exercising: 0.10%
         Running: 0.07%
No people is in the room: 0.06%



 34%|███▎      | 143/424 [00:35<00:37,  7.54it/s]


File: datasets/train/Gunshot/1000_3680.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 98.99%
         Potluck: 0.19%
Theft or vandalism: 0.16%
        Fighting: 0.11%
Verbal confrontations: 0.10%


File: datasets/train/Gunshot/2000_2640.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 87.78%
   Taking photos: 3.78%
        Tripping: 2.92%
        Break in: 2.63%
Theft or vandalism: 0.71%


File: datasets/train/Gunshot/2000_2740.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 83.82%
   Taking photos: 5.53%
        Tripping: 4.86%
        Break in: 2.25%
         Running: 1.22%



 34%|███▍      | 145/424 [00:35<00:32,  8.48it/s]


File: datasets/train/Gunshot/2000_2760.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 95.43%
   Taking photos: 1.40%
        Break in: 1.21%
        Tripping: 0.78%
Theft or vandalism: 0.29%


File: datasets/train/Gunshot/2000_2800.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 95.60%
        Break in: 2.79%
      Exercising: 0.55%
Theft or vandalism: 0.28%
   Taking photos: 0.12%


File: datasets/train/Gunshot/2000_3000.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 88.96%
        Break in: 5.50%
Theft or vandalism: 1.97%
        Tripping: 1.07%
     Laying down: 0.49%



 35%|███▍      | 147/424 [00:36<00:30,  9.05it/s]


File: datasets/train/Gunshot/2000_3920.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 99.06%
Verbal confrontations: 0.32%
Smoke or Fire or Open flame: 0.14%
        Fighting: 0.12%
     Laying down: 0.07%



 35%|███▌      | 150/424 [00:36<00:33,  8.16it/s]


File: datasets/train/Gunshot/2000_4280.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 24.10%
    Falling down: 22.47%
  Glass breaking: 11.96%
Theft or vandalism: 11.66%
        Fighting: 4.62%


File: datasets/train/Gunshot/2000_4320.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 94.20%
Theft or vandalism: 2.79%
        Break in: 0.54%
        Fighting: 0.50%
Physical altercations: 0.36%


File: datasets/train/Gunshot/2400_2980.wav
Ground Truth: Gunshot
Top predictions:
Theft or vandalism: 38.39%
         Gunshot: 26.76%
        Tripping: 15.54%
        Break in: 5.97%
         Running: 3.86%



 36%|███▌      | 153/424 [00:36<00:30,  8.78it/s]


File: datasets/train/Gunshot/2480_3240.wav
Ground Truth: Gunshot
Top predictions:
Theft or vandalism: 74.54%
        Break in: 19.90%
  Glass breaking: 1.92%
Furniture Collapse: 0.86%
        Fighting: 0.64%


File: datasets/train/Gunshot/3000_3460.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 86.33%
  Glass breaking: 5.68%
        Tripping: 3.05%
Theft or vandalism: 1.46%
         Running: 1.01%


File: datasets/train/Gunshot/3000_3640.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 87.96%
   Taking photos: 3.38%
        Tripping: 2.95%
        Break in: 2.93%
Theft or vandalism: 0.65%


File: datasets/train/Gunshot/3000_3920.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 95.59%
        Break in: 2.54%
      Exercising: 0.53%
Theft or vandalism: 0.46%
     Laying down: 0.24%



 37%|███▋      | 157/424 [00:37<00:33,  8.08it/s]


File: datasets/train/Gunshot/3000_4320.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 96.47%
Smoke or Fire or Open flame: 1.19%
Theft or vandalism: 0.57%
Furniture Collapse: 0.27%
    Falling down: 0.22%


File: datasets/train/Gunshot/3720_4720.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 90.64%
        Break in: 4.57%
Theft or vandalism: 1.60%
        Tripping: 0.95%
        Fighting: 0.44%


File: datasets/train/Gunshot/4000_4380.wav
Ground Truth: Gunshot
Top predictions:
        Tripping: 51.87%
        Break in: 17.13%
         Gunshot: 10.20%
Playing video games: 7.70%
     Laying down: 5.74%



 38%|███▊      | 160/424 [00:37<00:29,  9.00it/s]


File: datasets/train/Gunshot/4000_4400.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 36.29%
        Break in: 24.28%
     Laying down: 13.21%
        Tripping: 7.88%
        Fighting: 4.14%


File: datasets/train/Gunshot/4000_4440.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 38.88%
         Gunshot: 26.36%
         Choking: 11.48%
      Exercising: 8.82%
        Knitting: 3.26%


File: datasets/train/Gunshot/4000_4460.wav
Ground Truth: Gunshot
Top predictions:
         Singing: 28.35%
         Gunshot: 26.12%
        Fighting: 17.19%
        Break in: 4.06%
Playing video games: 3.71%



 38%|███▊      | 161/424 [00:37<00:29,  9.01it/s]


File: datasets/train/Gunshot/4000_4640.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 66.96%
        Break in: 29.86%
      Exercising: 1.06%
     Laying down: 0.52%
        Tripping: 0.46%



 39%|███▊      | 164/424 [00:38<00:31,  8.16it/s]


File: datasets/train/Gunshot/4000_4660.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 90.88%
        Break in: 7.87%
  Glass breaking: 1.02%
Theft or vandalism: 0.09%
Furniture Collapse: 0.02%


File: datasets/train/Gunshot/4000_4780.wav
Ground Truth: Gunshot
Top predictions:
        Tripping: 26.40%
         Gunshot: 23.43%
        Break in: 12.18%
     Laying down: 7.18%
Play with phone/tablet: 5.59%


File: datasets/train/Gunshot/4000_5000.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 45.42%
Theft or vandalism: 18.84%
  Glass breaking: 16.76%
         Gunshot: 9.11%
Furniture Collapse: 8.45%



 39%|███▉      | 166/424 [00:38<00:28,  8.90it/s]


File: datasets/train/Gunshot/4000_5320.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 90.90%
        Break in: 2.44%
        Fighting: 1.51%
Theft or vandalism: 1.28%
Physical altercations: 0.76%


File: datasets/train/Gunshot/4000_5400.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 97.88%
Smoke or Fire or Open flame: 0.86%
        Break in: 0.46%
Theft or vandalism: 0.18%
   Taking photos: 0.14%


File: datasets/train/Gunshot/4000_6120.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 87.59%
        Fighting: 3.26%
     Celebrating: 1.90%
      Exercising: 1.80%
Domestic violence: 0.87%



 40%|████      | 170/424 [00:38<00:30,  8.25it/s]


File: datasets/train/Gunshot/4000_6560.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 86.84%
Theft or vandalism: 5.87%
      Exercising: 1.34%
   Taking photos: 1.15%
     Laying down: 0.94%


File: datasets/train/Gunshot/4260_4880.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 99.59%
        Break in: 0.20%
Theft or vandalism: 0.08%
         Running: 0.02%
   Taking photos: 0.02%


File: datasets/train/Gunshot/4840_5620.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 67.97%
Theft or vandalism: 6.94%
        Tripping: 4.02%
         Running: 3.14%
      Exercising: 2.92%



 41%|████      | 173/424 [00:39<00:27,  9.19it/s]


File: datasets/train/Gunshot/4860_5660.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 81.37%
   Taking photos: 5.10%
        Tripping: 4.30%
        Break in: 3.77%
Theft or vandalism: 1.97%


File: datasets/train/Gunshot/4920_5680.wav
Ground Truth: Gunshot
Top predictions:
          Crying: 61.58%
         Choking: 17.39%
        Break in: 2.66%
   Taking photos: 2.45%
     Laying down: 2.38%


File: datasets/train/Gunshot/5000_5400.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 96.62%
        Break in: 2.65%
   Taking photos: 0.15%
        Tripping: 0.13%
         Potluck: 0.08%



 41%|████      | 174/424 [00:39<00:28,  8.91it/s]


File: datasets/train/Gunshot/5000_5440.wav
Ground Truth: Gunshot
Top predictions:
         Choking: 49.41%
        Break in: 15.78%
        Fighting: 14.89%
         Singing: 4.34%
      Exercising: 2.88%



 42%|████▏     | 177/424 [00:39<00:30,  8.11it/s]


File: datasets/train/Gunshot/5000_5800.wav
Ground Truth: Gunshot
Top predictions:
          Crying: 42.51%
         Choking: 22.24%
         Gunshot: 4.68%
   Taking photos: 3.49%
Domestic violence: 3.35%


File: datasets/train/Gunshot/5000_6000.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 54.89%
Domestic violence: 21.92%
        Fighting: 9.26%
         Choking: 4.71%
        Break in: 3.63%


File: datasets/train/Gunshot/5000_6320.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 98.30%
        Break in: 1.53%
     Laying down: 0.04%
        Fighting: 0.02%
Playing video games: 0.02%



 42%|████▏     | 180/424 [00:40<00:26,  9.09it/s]


File: datasets/train/Gunshot/5000_7640.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 99.84%
Verbal confrontations: 0.03%
        Fighting: 0.02%
   Taking photos: 0.02%
Playing video games: 0.02%


File: datasets/train/Gunshot/5240_5900.wav
Ground Truth: Gunshot
Top predictions:
        Fighting: 27.17%
        Break in: 21.71%
         Choking: 14.89%
         Gunshot: 9.16%
      Exercising: 8.49%


File: datasets/train/Gunshot/5280_5700.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 46.43%
         Gunshot: 37.26%
    Making noise: 4.68%
        Fighting: 4.36%
        Tripping: 2.26%



 43%|████▎     | 181/424 [00:40<00:26,  9.14it/s]


File: datasets/train/Gunshot/5460_5820.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 43.18%
         Gunshot: 25.45%
Picking up objects: 11.43%
  Glass breaking: 6.86%
Playing video games: 2.45%



 43%|████▎     | 184/424 [00:40<00:29,  8.13it/s]


File: datasets/train/Gunshot/5520_6200.wav
Ground Truth: Gunshot
Top predictions:
         Choking: 53.91%
         Gunshot: 19.57%
        Break in: 4.66%
   Taking photos: 3.65%
          Crying: 3.64%


File: datasets/train/Gunshot/5560_5920.wav
Ground Truth: Gunshot
Top predictions:
        Break in: 45.68%
     Laying down: 31.81%
        Tripping: 8.69%
         Gunshot: 7.47%
    Making noise: 1.96%


File: datasets/train/Gunshot/5680_6140.wav
Ground Truth: Gunshot
Top predictions:
         Gunshot: 96.74%
        Break in: 2.64%
        Tripping: 0.12%
Smoke or Fire or Open flame: 0.12%
   Taking photos: 0.11%



 43%|████▎     | 184/424 [00:40<00:29,  8.13it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9gK-lchw9AU_000019_000029.wav


 43%|████▎     | 184/424 [00:40<00:29,  8.13it/s]

MoviePy - Done.


 44%|████▎     | 185/424 [00:41<00:54,  4.40it/s]


File: datasets/train/Normal/9gK-lchw9AU_000019_000029.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 33.78%
Playing with pets: 20.56%
      Using drug: 6.28%
     Suffocating: 6.19%
         Talking: 5.30%



 44%|████▎     | 185/424 [00:41<00:54,  4.40it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9iKBdRiyrT8_000018_000028.wav


 44%|████▍     | 186/424 [00:41<01:03,  3.73it/s]

MoviePy - Done.

File: datasets/train/Normal/9iKBdRiyrT8_000018_000028.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 91.34%
Playing with pets: 5.45%
        Drinking: 2.17%
         Choking: 0.38%
     Suffocating: 0.13%



 44%|████▍     | 186/424 [00:41<01:03,  3.73it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9j29_SoXvnk_000004_000014.wav


 44%|████▍     | 187/424 [00:41<01:09,  3.43it/s]

MoviePy - Done.

File: datasets/train/Normal/9j29_SoXvnk_000004_000014.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 39.05%
  Brushing teeth: 37.04%
       Showering: 7.90%
      Using drug: 2.19%
         Dancing: 1.72%



 44%|████▍     | 187/424 [00:42<01:09,  3.43it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9khsuBlB7KM_000052_000062.wav


 44%|████▍     | 187/424 [00:42<01:09,  3.43it/s]

MoviePy - Done.

File: datasets/train/Normal/9khsuBlB7KM_000052_000062.mp4
Ground Truth: Normal
Top predictions:


 44%|████▍     | 188/424 [00:42<01:16,  3.10it/s]

No people is in the room: 59.25%
Playing with pets: 6.86%
   Doing laundry: 6.53%
     Celebrating: 3.10%
    Sitting down: 2.81%



 44%|████▍     | 188/424 [00:42<01:16,  3.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/9obe2uzQuLw_000031_000041.wav


 45%|████▍     | 189/424 [00:42<01:16,  3.06it/s]

MoviePy - Done.

File: datasets/train/Normal/9obe2uzQuLw_000031_000041.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 62.43%
          Dining: 12.72%
         Talking: 2.52%
   Setting table: 2.36%
        Drinking: 2.34%



 45%|████▍     | 189/424 [00:42<01:16,  3.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/a2PzfV-nLA0_000273_000283.wav


 45%|████▍     | 189/424 [00:43<01:16,  3.06it/s]

MoviePy - Done.


 45%|████▍     | 190/424 [00:43<01:23,  2.79it/s]


File: datasets/train/Normal/a2PzfV-nLA0_000273_000283.mp4
Ground Truth: Normal
Top predictions:
         Cooking: 75.84%
Unattended cooking: 21.70%
        Cleaning: 0.68%
       Showering: 0.59%
        Flooding: 0.22%



 45%|████▍     | 190/424 [00:43<01:23,  2.79it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/A65W8cR_unk_000110_000120.wav


 45%|████▌     | 191/424 [00:43<01:24,  2.75it/s]

MoviePy - Done.

File: datasets/train/Normal/A65W8cR_unk_000110_000120.mp4
Ground Truth: Normal
Top predictions:
         Reading: 70.10%
        Speaking: 10.42%
         Talking: 4.45%
No people is in the room: 4.25%
Playing board games: 3.96%



 45%|████▌     | 191/424 [00:43<01:24,  2.75it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/a6RPzcU3bzA_000007_000017.wav


 45%|████▌     | 191/424 [00:43<01:24,  2.75it/s]

MoviePy - Done.


 45%|████▌     | 192/424 [00:44<01:37,  2.38it/s]


File: datasets/train/Normal/a6RPzcU3bzA_000007_000017.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 28.34%
         Dancing: 16.58%
No people is in the room: 12.05%
Playing with pets: 10.04%
      Meditating: 4.02%

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/A7Th1TrWh8U_000086_000096.wav


 46%|████▌     | 193/424 [00:44<01:30,  2.54it/s]

MoviePy - Done.

File: datasets/train/Normal/A7Th1TrWh8U_000086_000096.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 19.39%
        Knitting: 9.01%
    Sitting down: 8.24%
No people is in the room: 7.80%
         Smoking: 6.47%



 46%|████▌     | 193/424 [00:44<01:30,  2.54it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AA-ZLdnX1mA_000060_000070.wav


 46%|████▌     | 193/424 [00:44<01:30,  2.54it/s]

MoviePy - Done.


 46%|████▌     | 195/424 [00:45<01:18,  2.92it/s]


File: datasets/train/Normal/AA-ZLdnX1mA_000060_000070.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 41.81%
     Eating food: 28.49%
         Talking: 5.06%
        Chatting: 4.55%
        Drinking: 4.09%

This video datasets/train/Normal/adl-01-cam0-rgb.mp4 has no audio!


 46%|████▋     | 197/424 [00:45<00:50,  4.50it/s]

This video datasets/train/Normal/adl-02-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-03-cam0-rgb.mp4 has no audio!


 47%|████▋     | 200/424 [00:45<00:32,  6.99it/s]

This video datasets/train/Normal/adl-04-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-05-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-06-cam0-rgb.mp4 has no audio!


 48%|████▊     | 203/424 [00:45<00:26,  8.23it/s]

This video datasets/train/Normal/adl-07-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-08-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-09-cam0-rgb.mp4 has no audio!


 49%|████▊     | 206/424 [00:46<00:23,  9.21it/s]

This video datasets/train/Normal/adl-10-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-11-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-12-cam0-rgb.mp4 has no audio!


 49%|████▉     | 208/424 [00:46<00:22,  9.57it/s]

This video datasets/train/Normal/adl-13-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-14-cam0-rgb.mp4 has no audio!


 50%|████▉     | 211/424 [00:46<00:22,  9.66it/s]

This video datasets/train/Normal/adl-15-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-16-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-17-cam0-rgb.mp4 has no audio!


 50%|█████     | 213/424 [00:46<00:20, 10.20it/s]

This video datasets/train/Normal/adl-18-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-19-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-20-cam0-rgb.mp4 has no audio!


 51%|█████     | 215/424 [00:47<00:20, 10.21it/s]

This video datasets/train/Normal/adl-21-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-22-cam0-rgb.mp4 has no audio!


 51%|█████▏    | 218/424 [00:47<00:21,  9.47it/s]

This video datasets/train/Normal/adl-23-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-24-cam0-rgb.mp4 has no audio!


 52%|█████▏    | 220/424 [00:47<00:20, 10.04it/s]

This video datasets/train/Normal/adl-25-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-26-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-27-cam0-rgb.mp4 has no audio!


 53%|█████▎    | 224/424 [00:47<00:19, 10.21it/s]

This video datasets/train/Normal/adl-28-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-29-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-30-cam0-rgb.mp4 has no audio!


 53%|█████▎    | 226/424 [00:48<00:21,  9.26it/s]

This video datasets/train/Normal/adl-31-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-32-cam0-rgb.mp4 has no audio!


 54%|█████▍    | 229/424 [00:48<00:20,  9.58it/s]

This video datasets/train/Normal/adl-33-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-34-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-35-cam0-rgb.mp4 has no audio!


 54%|█████▍    | 231/424 [00:48<00:20,  9.54it/s]

This video datasets/train/Normal/adl-36-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-37-cam0-rgb.mp4 has no audio!


 55%|█████▌    | 234/424 [00:49<00:19,  9.92it/s]

This video datasets/train/Normal/adl-38-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-39-cam0-rgb.mp4 has no audio!
This video datasets/train/Normal/adl-40-cam0-rgb.mp4 has no audio!


 55%|█████▌    | 234/424 [00:49<00:19,  9.92it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/admrQMs57JQ_000002_000012.wav


 55%|█████▌    | 235/424 [00:49<00:31,  6.00it/s]

MoviePy - Done.

File: datasets/train/Normal/admrQMs57JQ_000002_000012.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 72.73%
   Doing laundry: 4.37%
         Working: 3.37%
         Talking: 3.31%
   Setting table: 3.24%



 55%|█████▌    | 235/424 [00:49<00:31,  6.00it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AdwS9UEt1Jg_000125_000135.wav


 56%|█████▌    | 236/424 [00:49<00:40,  4.70it/s]

MoviePy - Done.

File: datasets/train/Normal/AdwS9UEt1Jg_000125_000135.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 24.94%
      Using drug: 17.55%
  Using computer: 11.16%
Organizing space: 10.53%
      Decorating: 4.17%



 56%|█████▌    | 236/424 [00:49<00:40,  4.70it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ADzB-RTmLJU_000004_000014.wav


 56%|█████▌    | 236/424 [00:50<00:40,  4.70it/s]

MoviePy - Done.


 56%|█████▌    | 237/424 [00:50<00:59,  3.12it/s]


File: datasets/train/Normal/ADzB-RTmLJU_000004_000014.mp4
Ground Truth: Normal
Top predictions:
   Doing laundry: 83.43%
        Cleaning: 3.75%
       Vacumning: 2.75%
Furniture Collapse: 1.79%
   Mopping floor: 1.36%

datasets/audios/train/Normal


 56%|█████▌    | 237/424 [00:50<00:59,  3.12it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/AG7rHLw-ckU_000301_000311.wav


 56%|█████▌    | 238/424 [00:50<01:01,  3.01it/s]

MoviePy - Done.

File: datasets/train/Normal/AG7rHLw-ckU_000301_000311.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 68.12%
   Setting table: 10.39%
         Working: 3.57%
  Using computer: 3.15%
   Doing laundry: 2.58%



 56%|█████▌    | 238/424 [00:50<01:01,  3.01it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AGmRgVcOLYA_000061_000071.wav


 56%|█████▋    | 239/424 [00:51<01:05,  2.85it/s]

MoviePy - Done.

File: datasets/train/Normal/AGmRgVcOLYA_000061_000071.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 49.31%
         Working: 7.24%
     Celebrating: 5.89%
        Studying: 5.65%
Playing board games: 5.49%



 56%|█████▋    | 239/424 [00:51<01:05,  2.85it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AIrIuRyN5VA_000097_000107.wav


 57%|█████▋    | 240/424 [00:51<01:06,  2.75it/s]

MoviePy - Done.

File: datasets/train/Normal/AIrIuRyN5VA_000097_000107.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 73.27%
       Vacumning: 4.95%
         Cooking: 4.75%
      Using drug: 2.65%
        Cleaning: 2.26%



 57%|█████▋    | 240/424 [00:51<01:06,  2.75it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/aKKNRI3PYYs_000130_000140.wav


 57%|█████▋    | 241/424 [00:51<01:07,  2.71it/s]

MoviePy - Done.

File: datasets/train/Normal/aKKNRI3PYYs_000130_000140.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 55.26%
Playing with pets: 13.18%
     Celebrating: 9.59%
   Mopping floor: 6.91%
         Working: 2.38%



 57%|█████▋    | 241/424 [00:52<01:07,  2.71it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ALuLyCkzCt8_000060_000070.wav


 57%|█████▋    | 241/424 [00:52<01:07,  2.71it/s]

MoviePy - Done.


 57%|█████▋    | 242/424 [00:52<01:20,  2.27it/s]


File: datasets/train/Normal/ALuLyCkzCt8_000060_000070.mp4
Ground Truth: Normal
Top predictions:
        Cleaning: 34.49%
       Showering: 15.17%
   Mopping floor: 12.32%
      Using drug: 5.41%
   Doing laundry: 5.00%



 57%|█████▋    | 242/424 [00:52<01:20,  2.27it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/anDbHNhATAE_000039_000049.wav


 57%|█████▋    | 243/424 [00:52<01:18,  2.32it/s]

MoviePy - Done.

File: datasets/train/Normal/anDbHNhATAE_000039_000049.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 35.85%
No people is in the room: 20.71%
Playing with pets: 12.42%
    Sitting down: 10.42%
      Using drug: 3.49%



 57%|█████▋    | 243/424 [00:53<01:18,  2.32it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/anzXsViG6wc_000370_000380.wav


 58%|█████▊    | 244/424 [00:53<01:15,  2.39it/s]

MoviePy - Done.

File: datasets/train/Normal/anzXsViG6wc_000370_000380.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 43.25%
        Studying: 27.70%
        Painting: 12.50%
      Decorating: 5.51%
Organizing space: 2.85%



 58%|█████▊    | 244/424 [00:53<01:15,  2.39it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AObYuoaz-7o_000245_000255.wav


 58%|█████▊    | 245/424 [00:53<01:15,  2.38it/s]

MoviePy - Done.

File: datasets/train/Normal/AObYuoaz-7o_000245_000255.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 99.86%
        Drinking: 0.10%
         Smoking: 0.02%
         Choking: 0.01%
No people is in the room: 0.00%



 58%|█████▊    | 245/424 [00:53<01:15,  2.38it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/aprcI6-IHmY_000146_000156.wav


 58%|█████▊    | 246/424 [00:54<01:11,  2.48it/s]

MoviePy - Done.

File: datasets/train/Normal/aprcI6-IHmY_000146_000156.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 36.73%
   Setting table: 27.60%
No people is in the room: 14.98%
      Exercising: 10.05%
   Mopping floor: 3.56%



 58%|█████▊    | 246/424 [00:54<01:11,  2.48it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/auSl2RAONs4_000007_000017.wav


 58%|█████▊    | 246/424 [00:54<01:11,  2.48it/s]

MoviePy - Done.


 58%|█████▊    | 247/424 [00:54<01:18,  2.26it/s]


File: datasets/train/Normal/auSl2RAONs4_000007_000017.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 44.54%
Playing with pets: 17.67%
         Talking: 9.94%
        Chatting: 4.37%
   Setting table: 2.74%



 58%|█████▊    | 247/424 [00:54<01:18,  2.26it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AV4t72IYADk_000048_000058.wav


 58%|█████▊    | 248/424 [00:55<01:16,  2.31it/s]

MoviePy - Done.

File: datasets/train/Normal/AV4t72IYADk_000048_000058.mp4
Ground Truth: Normal
Top predictions:
        Cleaning: 35.78%
    Sitting down: 16.92%
      Decorating: 8.58%
   Doing laundry: 8.37%
        Painting: 6.82%



 58%|█████▊    | 248/424 [00:55<01:16,  2.31it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AvgqtvtOgF4_000443_000453.wav


 59%|█████▊    | 249/424 [00:55<01:13,  2.39it/s]

MoviePy - Done.

File: datasets/train/Normal/AvgqtvtOgF4_000443_000453.mp4
Ground Truth: Normal
Top predictions:
Playing board games: 31.03%
  Using computer: 14.64%
        Studying: 13.67%
   Setting table: 8.49%
No people is in the room: 7.61%



 59%|█████▊    | 249/424 [00:55<01:13,  2.39it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/aWEk9Sfs67c_000194_000204.wav


 59%|█████▊    | 249/424 [00:55<01:13,  2.39it/s]

MoviePy - Done.


 59%|█████▉    | 250/424 [00:56<01:20,  2.15it/s]


File: datasets/train/Normal/aWEk9Sfs67c_000194_000204.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 53.40%
     Eating food: 10.05%
    Sitting down: 6.84%
        Drinking: 3.34%
Playing with pets: 3.27%



 59%|█████▉    | 250/424 [00:56<01:20,  2.15it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Awm6MWRRAa4_000222_000232.wav


 59%|█████▉    | 251/424 [00:56<01:16,  2.26it/s]

MoviePy - Done.

File: datasets/train/Normal/Awm6MWRRAa4_000222_000232.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 45.42%
   Doing laundry: 20.20%
         Cooking: 16.79%
Unattended cooking: 5.43%
       Showering: 3.71%



 59%|█████▉    | 251/424 [00:56<01:16,  2.26it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AY1CsIuu3jg_000229_000239.wav


 59%|█████▉    | 252/424 [00:56<01:13,  2.34it/s]

MoviePy - Done.

File: datasets/train/Normal/AY1CsIuu3jg_000229_000239.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 31.39%
   Setting table: 18.50%
         Cooking: 11.59%
Unattended cooking: 9.84%
   Doing laundry: 8.87%



 59%|█████▉    | 252/424 [00:57<01:13,  2.34it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/AYdyPzM8oL8_000004_000014.wav


 59%|█████▉    | 252/424 [00:57<01:13,  2.34it/s]

MoviePy - Done.


 60%|█████▉    | 253/424 [00:57<01:22,  2.06it/s]


File: datasets/train/Normal/AYdyPzM8oL8_000004_000014.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 32.20%
     Eating food: 25.61%
Playing with pets: 14.23%
         Talking: 11.23%
         Reading: 3.85%



 60%|█████▉    | 253/424 [00:57<01:22,  2.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/B-SQ8cYQSMk_000222_000232.wav


 60%|█████▉    | 253/424 [00:57<01:22,  2.06it/s]

MoviePy - Done.


 60%|█████▉    | 254/424 [00:57<01:21,  2.10it/s]


File: datasets/train/Normal/B-SQ8cYQSMk_000222_000232.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 52.74%
   Doing laundry: 10.14%
        Studying: 5.53%
   Setting table: 3.68%
    Sitting down: 3.32%



 60%|█████▉    | 254/424 [00:58<01:21,  2.10it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/B9JqNr0e-Pw_000022_000032.wav


 60%|██████    | 255/424 [00:58<01:16,  2.22it/s]

MoviePy - Done.

File: datasets/train/Normal/B9JqNr0e-Pw_000022_000032.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 39.71%
   Doing laundry: 10.87%
No people is in the room: 7.49%
        Painting: 5.76%
      Decorating: 5.69%



 60%|██████    | 255/424 [00:58<01:16,  2.22it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/baf8O6sSMLI_000053_000063.wav


 60%|██████    | 255/424 [00:58<01:16,  2.22it/s]

MoviePy - Done.


 60%|██████    | 256/424 [00:58<01:19,  2.11it/s]


File: datasets/train/Normal/baf8O6sSMLI_000053_000063.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 57.58%
No people is in the room: 7.77%
      Exercising: 5.25%
    Sitting down: 3.87%
     Celebrating: 3.20%



 60%|██████    | 256/424 [00:59<01:19,  2.11it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BDbTkmhDM2A_000087_000097.wav


 61%|██████    | 257/424 [00:59<01:16,  2.17it/s]

MoviePy - Done.

File: datasets/train/Normal/BDbTkmhDM2A_000087_000097.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 23.62%
Playing board games: 21.14%
Playing with pets: 15.12%
         Reading: 6.65%
         Talking: 6.53%



 61%|██████    | 257/424 [00:59<01:16,  2.17it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BiqJ_FkwCUQ_000006_000016.wav


 61%|██████    | 258/424 [00:59<01:11,  2.33it/s]

MoviePy - Done.

File: datasets/train/Normal/BiqJ_FkwCUQ_000006_000016.mp4
Ground Truth: Normal
Top predictions:
         Working: 27.02%
   Setting table: 12.15%
        Studying: 9.74%
      Decorating: 9.60%
Unattended cooking: 6.94%



 61%|██████    | 258/424 [00:59<01:11,  2.33it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BjFtM8jhFoU_000008_000018.wav


 61%|██████    | 259/424 [00:59<01:07,  2.46it/s]

MoviePy - Done.

File: datasets/train/Normal/BjFtM8jhFoU_000008_000018.mp4
Ground Truth: Normal
Top predictions:
         Dancing: 20.85%
      Using drug: 9.95%
  Using computer: 6.75%
   Doing laundry: 5.40%
        Cleaning: 5.09%



 61%|██████    | 259/424 [01:00<01:07,  2.46it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bKGIbOPMSPQ_000002_000012.wav


 61%|██████    | 259/424 [01:00<01:07,  2.46it/s]

MoviePy - Done.

File: datasets/train/Normal/bKGIbOPMSPQ_000002_000012.mp4
Ground Truth: Normal
Top predictions:


 61%|██████▏   | 260/424 [01:00<01:11,  2.28it/s]

   Doing laundry: 47.82%
         Working: 39.93%
  Using computer: 3.61%
       Showering: 2.93%
          Sewing: 0.90%

datasets/audios/train/Normal


 61%|██████▏   | 260/424 [01:00<01:11,  2.28it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/BL0sZO7hrIc_000427_000437.wav


 62%|██████▏   | 261/424 [01:00<01:08,  2.37it/s]

MoviePy - Done.

File: datasets/train/Normal/BL0sZO7hrIc_000427_000437.mp4
Ground Truth: Normal
Top predictions:
         Smoking: 51.54%
      Using drug: 20.31%
        Studying: 6.45%
  Using computer: 5.75%
     Eating food: 3.75%



 62%|██████▏   | 261/424 [01:01<01:08,  2.37it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bNRAN0wh0tc_000046_000056.wav


 62%|██████▏   | 262/424 [01:01<01:06,  2.45it/s]

MoviePy - Done.

File: datasets/train/Normal/bNRAN0wh0tc_000046_000056.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 46.18%
No people is in the room: 30.93%
        Drinking: 6.19%
         Smoking: 5.03%
        Chatting: 3.81%



 62%|██████▏   | 262/424 [01:01<01:06,  2.45it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BOc3MlonHb8_000075_000085.wav


 62%|██████▏   | 262/424 [01:01<01:06,  2.45it/s]

MoviePy - Done.


 62%|██████▏   | 263/424 [01:01<01:16,  2.09it/s]


File: datasets/train/Normal/BOc3MlonHb8_000075_000085.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 13.82%
      Meditating: 13.07%
No people is in the room: 12.88%
      Decorating: 12.68%
  Using computer: 8.31%



 62%|██████▏   | 263/424 [01:02<01:16,  2.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bPfJAqym2vc_000019_000029.wav


 62%|██████▏   | 263/424 [01:02<01:16,  2.09it/s]

MoviePy - Done.

File: datasets/train/Normal/bPfJAqym2vc_000019_000029.mp4
Ground Truth: Normal
Top predictions:


 62%|██████▏   | 264/424 [01:02<01:13,  2.17it/s]

  Using computer: 51.09%
      Using drug: 20.90%
        Painting: 4.36%
Organizing space: 3.50%
        Studying: 3.28%

datasets/audios/train/Normal


 62%|██████▏   | 264/424 [01:02<01:13,  2.17it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/bPIP4BmjDAw_000070_000080.wav


 62%|██████▎   | 265/424 [01:02<01:10,  2.27it/s]

MoviePy - Done.

File: datasets/train/Normal/bPIP4BmjDAw_000070_000080.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 50.50%
  Using computer: 18.97%
Organizing space: 11.98%
      Decorating: 2.38%
   Playing music: 2.28%



 62%|██████▎   | 265/424 [01:02<01:10,  2.27it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Bq0R48bHZnY_000412_000422.wav


 62%|██████▎   | 265/424 [01:02<01:10,  2.27it/s]

MoviePy - Done.


 63%|██████▎   | 266/424 [01:03<01:14,  2.12it/s]


File: datasets/train/Normal/Bq0R48bHZnY_000412_000422.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 64.64%
   Doing laundry: 18.95%
   Mopping floor: 5.45%
         Working: 2.17%
   Setting table: 1.48%

datasets/audios/train/Normal


 63%|██████▎   | 266/424 [01:03<01:14,  2.12it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/bSG07EG2vu8_000007_000017.wav


 63%|██████▎   | 267/424 [01:03<01:10,  2.24it/s]

MoviePy - Done.

File: datasets/train/Normal/bSG07EG2vu8_000007_000017.mp4
Ground Truth: Normal
Top predictions:
         Dancing: 58.26%
      Using drug: 4.69%
     Celebrating: 4.30%
Furniture Collapse: 2.30%
      Exercising: 2.22%



 63%|██████▎   | 267/424 [01:03<01:10,  2.24it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BSOYvQ1A7_s_000027_000037.wav


 63%|██████▎   | 267/424 [01:03<01:10,  2.24it/s]

MoviePy - Done.


 63%|██████▎   | 268/424 [01:04<01:08,  2.28it/s]


File: datasets/train/Normal/BSOYvQ1A7_s_000027_000037.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 24.64%
          Dining: 21.87%
         Potluck: 6.62%
   Setting table: 5.76%
        Drinking: 5.58%



 63%|██████▎   | 268/424 [01:04<01:08,  2.28it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/bt3ELWHiEds_000526_000536.wav


 63%|██████▎   | 268/424 [01:04<01:08,  2.28it/s]

MoviePy - Done.


 63%|██████▎   | 269/424 [01:04<01:14,  2.07it/s]


File: datasets/train/Normal/bt3ELWHiEds_000526_000536.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 38.96%
     Eating food: 34.24%
Playing with pets: 10.87%
        Drinking: 2.66%
         Talking: 2.09%

datasets/audios/train/Normal


 63%|██████▎   | 269/424 [01:04<01:14,  2.07it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/btIe4SwFUhg_000080_000090.wav


 63%|██████▎   | 269/424 [01:04<01:14,  2.07it/s]

MoviePy - Done.


 64%|██████▎   | 270/424 [01:05<01:11,  2.17it/s]


File: datasets/train/Normal/btIe4SwFUhg_000080_000090.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 40.69%
    Sitting down: 31.99%
        Studying: 4.05%
   Mopping floor: 2.69%
      Using drug: 2.00%

datasets/audios/train/Normal


 64%|██████▎   | 270/424 [01:05<01:11,  2.17it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/BvdknPz77Hs_000000_000010.wav


 64%|██████▍   | 271/424 [01:05<01:05,  2.33it/s]

MoviePy - Done.

File: datasets/train/Normal/BvdknPz77Hs_000000_000010.mp4
Ground Truth: Normal
Top predictions:
         Reading: 41.09%
No people is in the room: 7.47%
Playing board games: 5.88%
  Using computer: 3.95%
        Studying: 3.94%



 64%|██████▍   | 271/424 [01:05<01:05,  2.33it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BWqOCs4yVo4_000000_000010.wav


 64%|██████▍   | 272/424 [01:05<01:03,  2.38it/s]

MoviePy - Done.

File: datasets/train/Normal/BWqOCs4yVo4_000000_000010.mp4
Ground Truth: Normal
Top predictions:
   Playing music: 76.07%
         Dancing: 18.94%
      Exercising: 0.82%
      Using drug: 0.57%
     Celebrating: 0.47%



 64%|██████▍   | 272/424 [01:05<01:03,  2.38it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BzcfW7JTsGM_000045_000055.wav


 64%|██████▍   | 273/424 [01:06<00:59,  2.52it/s]

MoviePy - Done.

File: datasets/train/Normal/BzcfW7JTsGM_000045_000055.mp4
Ground Truth: Normal
Top predictions:
        Speaking: 21.85%
         Talking: 20.82%
No people is in the room: 13.44%
         Reading: 10.44%
  Brushing teeth: 6.72%



 64%|██████▍   | 273/424 [01:06<00:59,  2.52it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/BZOSTgHGUCE_000229_000239.wav


 64%|██████▍   | 273/424 [01:06<00:59,  2.52it/s]

MoviePy - Done.


 65%|██████▍   | 274/424 [01:06<01:08,  2.19it/s]


File: datasets/train/Normal/BZOSTgHGUCE_000229_000239.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 16.53%
     Celebrating: 14.23%
No people is in the room: 13.69%
   Doing laundry: 8.09%
  Using computer: 7.95%

datasets/audios/train/Normal


 65%|██████▍   | 274/424 [01:06<01:08,  2.19it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/c1o7820XyTI_000310_000320.wav


 65%|██████▍   | 274/424 [01:07<01:08,  2.19it/s]

MoviePy - Done.

File: datasets/train/Normal/c1o7820XyTI_000310_000320.mp4
Ground Truth: Normal
Top predictions:


 65%|██████▍   | 275/424 [01:07<01:06,  2.25it/s]

No people is in the room: 36.53%
    Sitting down: 32.81%
   Mopping floor: 12.03%
   Doing laundry: 4.30%
   Setting table: 4.07%

datasets/audios/train/Normal


 65%|██████▍   | 275/424 [01:07<01:06,  2.25it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/C3ayjYkczC4_000060_000070.wav


 65%|██████▍   | 275/424 [01:07<01:06,  2.25it/s]

MoviePy - Done.


 65%|██████▌   | 276/424 [01:07<01:03,  2.33it/s]


File: datasets/train/Normal/C3ayjYkczC4_000060_000070.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 49.32%
         Talking: 12.86%
         Reading: 11.25%
        Speaking: 5.08%
Playing with pets: 4.62%



 65%|██████▌   | 276/424 [01:07<01:03,  2.33it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/C45UEQ5Oxno_000075_000085.wav


 65%|██████▌   | 276/424 [01:07<01:03,  2.33it/s]

MoviePy - Done.


 65%|██████▌   | 277/424 [01:08<01:08,  2.13it/s]


File: datasets/train/Normal/C45UEQ5Oxno_000075_000085.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 33.26%
Playing with pets: 23.22%
         Talking: 8.86%
   Doing laundry: 6.48%
         Working: 4.92%



 65%|██████▌   | 277/424 [01:08<01:08,  2.13it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/c729EVAFb_s_000415_000425.wav


 66%|██████▌   | 278/424 [01:08<01:06,  2.21it/s]

MoviePy - Done.

File: datasets/train/Normal/c729EVAFb_s_000415_000425.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 19.60%
No people is in the room: 12.62%
        Studying: 10.80%
     Eating food: 6.49%
         Reading: 5.87%



 66%|██████▌   | 278/424 [01:08<01:06,  2.21it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/caz61OyDs1k_000045_000055.wav


 66%|██████▌   | 279/424 [01:08<01:02,  2.32it/s]

MoviePy - Done.

File: datasets/train/Normal/caz61OyDs1k_000045_000055.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 44.12%
          Dining: 16.11%
No people is in the room: 8.61%
   Setting table: 7.85%
     Eating food: 6.41%



 66%|██████▌   | 279/424 [01:09<01:02,  2.32it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cbXmtG99SU0_000000_000010.wav


 66%|██████▌   | 279/424 [01:09<01:02,  2.32it/s]

MoviePy - Done.


 66%|██████▌   | 280/424 [01:09<01:08,  2.09it/s]


File: datasets/train/Normal/cbXmtG99SU0_000000_000010.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 84.45%
        Laughing: 5.46%
         Talking: 2.03%
     Eating food: 1.94%
No people is in the room: 1.86%



 66%|██████▌   | 280/424 [01:09<01:08,  2.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cc4YIsJz5sg_000002_000012.wav


 66%|██████▋   | 281/424 [01:09<01:04,  2.23it/s]

MoviePy - Done.

File: datasets/train/Normal/cc4YIsJz5sg_000002_000012.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 15.45%
      Decorating: 11.13%
    Sitting down: 10.84%
      Using drug: 8.22%
  Using computer: 5.41%



 66%|██████▋   | 281/424 [01:10<01:04,  2.23it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cCJdUn_Dv2M_000181_000191.wav


 67%|██████▋   | 282/424 [01:10<00:59,  2.38it/s]

MoviePy - Done.

File: datasets/train/Normal/cCJdUn_Dv2M_000181_000191.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 75.90%
   Doing laundry: 3.28%
     Eating food: 2.93%
   Mopping floor: 2.08%
    Sitting down: 2.03%



 67%|██████▋   | 282/424 [01:10<00:59,  2.38it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CdtDTYW5YOs_000117_000127.wav


 67%|██████▋   | 283/424 [01:10<00:58,  2.42it/s]

MoviePy - Done.

File: datasets/train/Normal/CdtDTYW5YOs_000117_000127.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 92.15%
         Reading: 2.18%
     Celebrating: 1.20%
         Working: 0.76%
        Studying: 0.75%



 67%|██████▋   | 283/424 [01:10<00:58,  2.42it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CeYIbLlRVVg_000357_000367.wav


 67%|██████▋   | 284/424 [01:11<00:56,  2.46it/s]

MoviePy - Done.

File: datasets/train/Normal/CeYIbLlRVVg_000357_000367.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 55.07%
  Using computer: 23.35%
No people is in the room: 6.32%
         Working: 3.37%
         Reading: 1.36%



 67%|██████▋   | 284/424 [01:11<00:56,  2.46it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CfP4T-QKX4U_000332_000342.wav


 67%|██████▋   | 284/424 [01:11<00:56,  2.46it/s]

MoviePy - Done.


 67%|██████▋   | 285/424 [01:11<01:04,  2.14it/s]


File: datasets/train/Normal/CfP4T-QKX4U_000332_000342.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 37.77%
         Reading: 11.42%
         Talking: 10.28%
   Setting table: 8.31%
        Speaking: 6.59%

datasets/audios/train/Normal


 67%|██████▋   | 285/424 [01:11<01:04,  2.14it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/cgB_HnWGY9Y_000001_000011.wav


 67%|██████▋   | 286/424 [01:12<01:01,  2.24it/s]

MoviePy - Done.

File: datasets/train/Normal/cgB_HnWGY9Y_000001_000011.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 76.07%
         Dancing: 7.73%
   Doing laundry: 2.49%
           Party: 2.38%
Playing with pets: 1.86%



 67%|██████▋   | 286/424 [01:12<01:01,  2.24it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CGQfpj5Q2XA_000026_000036.wav


 68%|██████▊   | 287/424 [01:12<00:57,  2.39it/s]

MoviePy - Done.

File: datasets/train/Normal/CGQfpj5Q2XA_000026_000036.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 37.50%
      Using drug: 7.23%
   Setting table: 5.86%
No people is in the room: 5.47%
    Sitting down: 5.21%



 68%|██████▊   | 287/424 [01:12<00:57,  2.39it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cGuxUVfBZ20_000050_000060.wav


 68%|██████▊   | 287/424 [01:12<00:57,  2.39it/s]

MoviePy - Done.


 68%|██████▊   | 288/424 [01:12<00:58,  2.34it/s]


File: datasets/train/Normal/cGuxUVfBZ20_000050_000060.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 27.46%
No people is in the room: 24.11%
        Speaking: 13.42%
         Talking: 12.01%
        Chatting: 3.56%



 68%|██████▊   | 288/424 [01:13<00:58,  2.34it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ChRYSf_9SoM_000035_000045.wav


 68%|██████▊   | 289/424 [01:13<00:55,  2.44it/s]

MoviePy - Done.

File: datasets/train/Normal/ChRYSf_9SoM_000035_000045.mp4
Ground Truth: Normal
Top predictions:
      Using drug: 18.13%
      Decorating: 13.63%
     Celebrating: 10.86%
        Cleaning: 6.91%
        Painting: 6.63%



 68%|██████▊   | 289/424 [01:13<00:55,  2.44it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CLGYN3I3nIY_000039_000049.wav


 68%|██████▊   | 289/424 [01:13<00:55,  2.44it/s]

MoviePy - Done.


 68%|██████▊   | 290/424 [01:13<01:00,  2.21it/s]


File: datasets/train/Normal/CLGYN3I3nIY_000039_000049.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 63.65%
         Smoking: 9.08%
     Eating food: 7.17%
No people is in the room: 4.79%
      Using drug: 3.80%

datasets/audios/train/Normal


 68%|██████▊   | 290/424 [01:13<01:00,  2.21it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/CLj6957VkZg_000041_000051.wav


 69%|██████▊   | 291/424 [01:14<00:57,  2.31it/s]

MoviePy - Done.

File: datasets/train/Normal/CLj6957VkZg_000041_000051.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 45.40%
       Showering: 27.14%
Playing with pets: 8.69%
   Doing laundry: 7.10%
   Mopping floor: 3.80%



 69%|██████▊   | 291/424 [01:14<00:57,  2.31it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CM3tD22RxB8_000004_000014.wav


 69%|██████▊   | 291/424 [01:14<00:57,  2.31it/s]

MoviePy - Done.


 69%|██████▉   | 292/424 [01:14<00:55,  2.38it/s]


File: datasets/train/Normal/CM3tD22RxB8_000004_000014.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 34.75%
   Doing laundry: 18.86%
   Setting table: 12.66%
   Mopping floor: 9.47%
    Sitting down: 4.98%

datasets/audios/train/Normal


 69%|██████▉   | 292/424 [01:14<00:55,  2.38it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/CqPXNz7YDYM_000007_000017.wav


 69%|██████▉   | 293/424 [01:14<00:52,  2.52it/s]

MoviePy - Done.

File: datasets/train/Normal/CqPXNz7YDYM_000007_000017.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 47.54%
         Dancing: 8.55%
   Mopping floor: 5.99%
    Sitting down: 5.63%
   Doing laundry: 5.30%



 69%|██████▉   | 293/424 [01:15<00:52,  2.52it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cRg6xZpmBrI_000404_000414.wav


 69%|██████▉   | 293/424 [01:15<00:52,  2.52it/s]

MoviePy - Done.


 69%|██████▉   | 294/424 [01:15<00:59,  2.17it/s]


File: datasets/train/Normal/cRg6xZpmBrI_000404_000414.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 22.18%
         Talking: 21.21%
        Speaking: 16.94%
No people is in the room: 14.12%
     Eating food: 7.32%

datasets/audios/train/Normal


 69%|██████▉   | 294/424 [01:15<00:59,  2.17it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/csQSjmHhsXA_000018_000028.wav


 70%|██████▉   | 295/424 [01:15<00:57,  2.23it/s]

MoviePy - Done.

File: datasets/train/Normal/csQSjmHhsXA_000018_000028.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 45.43%
Unattended cooking: 20.93%
         Cooking: 20.65%
        Studying: 2.50%
          Dining: 2.36%



 70%|██████▉   | 295/424 [01:16<00:57,  2.23it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CtDdx-V3J7M_000074_000084.wav


 70%|██████▉   | 296/424 [01:16<00:53,  2.38it/s]

MoviePy - Done.

File: datasets/train/Normal/CtDdx-V3J7M_000074_000084.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 74.39%
        Sleeping: 4.24%
        Studying: 3.93%
        Knitting: 3.19%
         Smoking: 2.28%



 70%|██████▉   | 296/424 [01:16<00:53,  2.38it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/cThgCWwzvU8_000015_000025.wav


 70%|██████▉   | 296/424 [01:16<00:53,  2.38it/s]

MoviePy - Done.


 70%|███████   | 297/424 [01:16<00:58,  2.15it/s]


File: datasets/train/Normal/cThgCWwzvU8_000015_000025.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 21.32%
           Party: 12.67%
   Doing laundry: 9.13%
         Dancing: 7.18%
       Showering: 4.96%

datasets/audios/train/Normal


 70%|███████   | 297/424 [01:17<00:58,  2.15it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/CTZETRzQWOU_000031_000041.wav


 70%|███████   | 298/424 [01:17<00:55,  2.26it/s]

MoviePy - Done.

File: datasets/train/Normal/CTZETRzQWOU_000031_000041.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 29.65%
  Using computer: 17.03%
         Working: 9.71%
    Sitting down: 9.25%
        Studying: 6.03%



 70%|███████   | 298/424 [01:17<00:55,  2.26it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/CyqaeqsCKMU_000050_000060.wav


 71%|███████   | 299/424 [01:17<00:51,  2.41it/s]

MoviePy - Done.

File: datasets/train/Normal/CyqaeqsCKMU_000050_000060.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 16.40%
   Doing laundry: 10.52%
      Using drug: 9.99%
         Smoking: 7.06%
      Decorating: 6.08%



 71%|███████   | 299/424 [01:17<00:51,  2.41it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/D6M-1uemZTU_000152_000162.wav


 71%|███████   | 299/424 [01:17<00:51,  2.41it/s]

MoviePy - Done.


 71%|███████   | 300/424 [01:18<00:57,  2.17it/s]


File: datasets/train/Normal/D6M-1uemZTU_000152_000162.mp4
Ground Truth: Normal
Top predictions:
      Exercising: 24.60%
    Sitting down: 19.45%
No people is in the room: 11.12%
   Setting table: 10.15%
     Celebrating: 6.56%



 71%|███████   | 300/424 [01:18<00:57,  2.17it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/d7Ggedw-sOM_001169_001179.wav


 71%|███████   | 300/424 [01:18<00:57,  2.17it/s]

MoviePy - Done.


 71%|███████   | 301/424 [01:18<00:56,  2.18it/s]


File: datasets/train/Normal/d7Ggedw-sOM_001169_001179.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 21.96%
Playing board games: 19.14%
         Reading: 11.05%
No people is in the room: 8.78%
      Exercising: 7.39%

datasets/audios/train/Normal


 71%|███████   | 301/424 [01:18<00:56,  2.18it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/D7lm7hLSM0I_000034_000044.wav


 71%|███████   | 302/424 [01:18<00:52,  2.34it/s]

MoviePy - Done.

File: datasets/train/Normal/D7lm7hLSM0I_000034_000044.mp4
Ground Truth: Normal
Top predictions:
         Reading: 69.00%
         Talking: 9.44%
No people is in the room: 8.78%
        Speaking: 4.81%
Playing board games: 1.62%



 71%|███████   | 302/424 [01:19<00:52,  2.34it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dfULz-lLaDc_000002_000012.wav


 71%|███████   | 302/424 [01:19<00:52,  2.34it/s]

MoviePy - Done.


 71%|███████▏  | 303/424 [01:19<00:52,  2.31it/s]


File: datasets/train/Normal/dfULz-lLaDc_000002_000012.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 53.00%
    Sitting down: 10.78%
   Doing laundry: 9.64%
         Dancing: 3.89%
No people is in the room: 3.71%

datasets/audios/train/Normal


 71%|███████▏  | 303/424 [01:19<00:52,  2.31it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/DFX28NEFY38_000157_000167.wav


 72%|███████▏  | 304/424 [01:19<00:49,  2.44it/s]

MoviePy - Done.

File: datasets/train/Normal/DFX28NEFY38_000157_000167.mp4
Ground Truth: Normal
Top predictions:
   Doing laundry: 48.87%
No people is in the room: 35.93%
     Eating food: 1.94%
        Drinking: 1.68%
         Talking: 1.20%



 72%|███████▏  | 304/424 [01:19<00:49,  2.44it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/DkZml-dK0pU_000043_000053.wav


 72%|███████▏  | 304/424 [01:20<00:49,  2.44it/s]

MoviePy - Done.


 72%|███████▏  | 305/424 [01:20<00:54,  2.19it/s]


File: datasets/train/Normal/DkZml-dK0pU_000043_000053.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 33.52%
Playing with pets: 27.96%
         Dancing: 5.88%
   Setting table: 4.25%
   Mopping floor: 4.21%

datasets/audios/train/Normal


 72%|███████▏  | 305/424 [01:20<00:54,  2.19it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/DLhpyADKYKM_000184_000194.wav


 72%|███████▏  | 306/424 [01:20<00:51,  2.31it/s]

MoviePy - Done.

File: datasets/train/Normal/DLhpyADKYKM_000184_000194.mp4
Ground Truth: Normal
Top predictions:
        Studying: 40.88%
      Decorating: 21.89%
        Cleaning: 6.16%
         Working: 5.46%
    Sitting down: 5.27%



 72%|███████▏  | 306/424 [01:20<00:51,  2.31it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dlK1QS-Aofs_000066_000076.wav


 72%|███████▏  | 307/424 [01:21<00:48,  2.42it/s]

MoviePy - Done.

File: datasets/train/Normal/dlK1QS-Aofs_000066_000076.mp4
Ground Truth: Normal
Top predictions:
        Painting: 6.76%
   Playing music: 6.40%
         Dancing: 6.15%
  Using computer: 5.62%
      Decorating: 5.52%



 72%|███████▏  | 307/424 [01:21<00:48,  2.42it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dMBlXTR3qQU_000187_000197.wav


 72%|███████▏  | 307/424 [01:21<00:48,  2.42it/s]

MoviePy - Done.

File: datasets/train/Normal/dMBlXTR3qQU_000187_000197.mp4
Ground Truth: Normal
Top predictions:


 73%|███████▎  | 308/424 [01:21<00:48,  2.41it/s]

    Sitting down: 85.32%
   Doing laundry: 7.41%
   Mopping floor: 1.79%
         Working: 1.07%
Physical altercations: 0.65%



 73%|███████▎  | 308/424 [01:21<00:48,  2.41it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dndkqyk45Ks_000015_000025.wav


 73%|███████▎  | 309/424 [01:21<00:46,  2.47it/s]

MoviePy - Done.

File: datasets/train/Normal/dndkqyk45Ks_000015_000025.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 43.77%
Playing with pets: 19.47%
   Setting table: 18.69%
   Doing laundry: 7.13%
   Mopping floor: 3.28%



 73%|███████▎  | 309/424 [01:22<00:46,  2.47it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/DNWCjFUE75s_000289_000299.wav


 73%|███████▎  | 309/424 [01:22<00:46,  2.47it/s]

MoviePy - Done.


 73%|███████▎  | 310/424 [01:22<00:52,  2.18it/s]


File: datasets/train/Normal/DNWCjFUE75s_000289_000299.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 34.82%
     Celebrating: 13.57%
         Reading: 7.71%
   Setting table: 5.92%
  Using computer: 5.55%

datasets/audios/train/Normal


 73%|███████▎  | 310/424 [01:22<00:52,  2.18it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/dsnkk1tytpE_000066_000076.wav


 73%|███████▎  | 311/424 [01:22<00:48,  2.31it/s]

MoviePy - Done.

File: datasets/train/Normal/dsnkk1tytpE_000066_000076.mp4
Ground Truth: Normal
Top predictions:
Playing board games: 42.07%
          Dining: 19.19%
   Setting table: 13.16%
No people is in the room: 6.68%
Playing with pets: 3.26%



 73%|███████▎  | 311/424 [01:23<00:48,  2.31it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dxBH1r8To3Y_000010_000020.wav


 74%|███████▎  | 312/424 [01:23<00:46,  2.40it/s]

MoviePy - Done.

File: datasets/train/Normal/dxBH1r8To3Y_000010_000020.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 61.15%
     Eating food: 22.60%
        Drinking: 6.64%
Playing with pets: 2.97%
   Doing laundry: 1.08%



 74%|███████▎  | 312/424 [01:23<00:46,  2.40it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/dXOxzXI-psA_000017_000027.wav


 74%|███████▍  | 313/424 [01:23<00:45,  2.46it/s]

MoviePy - Done.

File: datasets/train/Normal/dXOxzXI-psA_000017_000027.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 37.01%
         Reading: 13.95%
Making phone calls: 9.98%
  Using computer: 7.75%
         Talking: 6.84%



 74%|███████▍  | 313/424 [01:23<00:45,  2.46it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/e-xCuXyO1w0_000122_000132.wav


 74%|███████▍  | 314/424 [01:23<00:43,  2.54it/s]

MoviePy - Done.

File: datasets/train/Normal/e-xCuXyO1w0_000122_000132.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 54.77%
   Doing laundry: 27.81%
   Mopping floor: 4.04%
No people is in the room: 3.12%
   Setting table: 2.82%



 74%|███████▍  | 314/424 [01:24<00:43,  2.54it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/E4sSE4n7AIY_000617_000627.wav


 74%|███████▍  | 314/424 [01:24<00:43,  2.54it/s]

MoviePy - Done.


 74%|███████▍  | 315/424 [01:24<00:51,  2.13it/s]


File: datasets/train/Normal/E4sSE4n7AIY_000617_000627.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 46.04%
         Working: 5.68%
        Painting: 4.57%
         Dancing: 4.25%
Playing board games: 3.89%

datasets/audios/train/Normal


 74%|███████▍  | 315/424 [01:24<00:51,  2.13it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/e6knPRich_k_000053_000063.wav


 75%|███████▍  | 316/424 [01:24<00:47,  2.27it/s]

MoviePy - Done.

File: datasets/train/Normal/e6knPRich_k_000053_000063.mp4
Ground Truth: Normal
Top predictions:
Playing with pets: 30.01%
     Eating food: 20.32%
No people is in the room: 10.07%
Play with phone/tablet: 9.45%
  Using computer: 5.64%



 75%|███████▍  | 316/424 [01:25<00:47,  2.27it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/e7MT8cHYxls_000016_000026.wav


 75%|███████▍  | 317/424 [01:25<00:47,  2.27it/s]

MoviePy - Done.

File: datasets/train/Normal/e7MT8cHYxls_000016_000026.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 82.27%
   Doing laundry: 7.80%
  Using computer: 1.88%
Playing board games: 1.75%
         Working: 1.42%



 75%|███████▍  | 317/424 [01:25<00:47,  2.27it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/eDegjR0YmXE_000102_000112.wav


 75%|███████▍  | 317/424 [01:25<00:47,  2.27it/s]

MoviePy - Done.


 75%|███████▌  | 318/424 [01:25<00:51,  2.06it/s]


File: datasets/train/Normal/eDegjR0YmXE_000102_000112.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 29.24%
        Studying: 18.81%
No people is in the room: 7.98%
   Setting table: 7.36%
Organizing space: 6.14%



 75%|███████▌  | 318/424 [01:26<00:51,  2.06it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/eJfnOEeJpM0_000044_000054.wav


 75%|███████▌  | 319/424 [01:26<00:49,  2.12it/s]

MoviePy - Done.

File: datasets/train/Normal/eJfnOEeJpM0_000044_000054.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 28.40%
   Setting table: 20.21%
Unattended cooking: 5.46%
      Decorating: 5.39%
Playing with pets: 5.21%



 75%|███████▌  | 319/424 [01:26<00:49,  2.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ENKtS5yCT2o_000053_000063.wav


 75%|███████▌  | 320/424 [01:26<00:45,  2.30it/s]

MoviePy - Done.

File: datasets/train/Normal/ENKtS5yCT2o_000053_000063.mp4
Ground Truth: Normal
Top predictions:
          Dining: 18.47%
No people is in the room: 18.09%
Organizing space: 9.76%
  Using computer: 8.68%
           Party: 7.88%



 75%|███████▌  | 320/424 [01:26<00:45,  2.30it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/EQViTe9m1Hw_000112_000122.wav


 75%|███████▌  | 320/424 [01:27<00:45,  2.30it/s]

MoviePy - Done.


 76%|███████▌  | 321/424 [01:27<00:49,  2.09it/s]


File: datasets/train/Normal/EQViTe9m1Hw_000112_000122.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 36.70%
        Studying: 35.72%
        Painting: 9.42%
      Decorating: 8.32%
         Working: 3.15%



 76%|███████▌  | 321/424 [01:27<00:49,  2.09it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/ew7MSTTFMeg_000272_000282.wav


 76%|███████▌  | 322/424 [01:27<00:45,  2.22it/s]

MoviePy - Done.

File: datasets/train/Normal/ew7MSTTFMeg_000272_000282.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 35.48%
Playing with pets: 21.77%
      Using drug: 8.92%
         Reading: 6.57%
        Drinking: 2.45%



 76%|███████▌  | 322/424 [01:27<00:45,  2.22it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/F1KA2nf-C9M_000133_000143.wav


 76%|███████▌  | 323/424 [01:28<00:43,  2.34it/s]

MoviePy - Done.

File: datasets/train/Normal/F1KA2nf-C9M_000133_000143.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 24.72%
      Using drug: 21.93%
         Reading: 17.96%
Playing board games: 3.28%
        Speaking: 3.25%



 76%|███████▌  | 323/424 [01:28<00:43,  2.34it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/f3DMBH9EJNI_000007_000017.wav


 76%|███████▌  | 323/424 [01:28<00:43,  2.34it/s]

MoviePy - Done.


 76%|███████▋  | 324/424 [01:28<00:47,  2.12it/s]


File: datasets/train/Normal/f3DMBH9EJNI_000007_000017.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 34.27%
         Dancing: 16.15%
    Sitting down: 16.06%
        Laughing: 4.52%
        Drinking: 3.25%



 76%|███████▋  | 324/424 [01:28<00:47,  2.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/F621wrqXp34_000036_000046.wav


 76%|███████▋  | 324/424 [01:28<00:47,  2.12it/s]

MoviePy - Done.

File: datasets/train/Normal/F621wrqXp34_000036_000046.mp4
Ground Truth: Normal
Top predictions:


 77%|███████▋  | 325/424 [01:29<00:45,  2.15it/s]

         Reading: 18.03%
  Using computer: 15.50%
        Studying: 11.09%
      Exercising: 7.58%
No people is in the room: 7.26%

datasets/audios/train/Normal


 77%|███████▋  | 325/424 [01:29<00:45,  2.15it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/FJ8M6LK6evw_000022_000032.wav


 77%|███████▋  | 326/424 [01:29<00:42,  2.32it/s]

MoviePy - Done.

File: datasets/train/Normal/FJ8M6LK6evw_000022_000032.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 15.10%
      Using drug: 14.87%
        Chatting: 13.50%
  Using computer: 13.33%
Organizing space: 10.95%



 77%|███████▋  | 326/424 [01:29<00:42,  2.32it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/FJljds1AA2o_000016_000026.wav


 77%|███████▋  | 326/424 [01:29<00:42,  2.32it/s]

MoviePy - Done.


 77%|███████▋  | 327/424 [01:30<00:45,  2.12it/s]


File: datasets/train/Normal/FJljds1AA2o_000016_000026.mp4
Ground Truth: Normal
Top predictions:
         Talking: 28.23%
     Eating food: 24.85%
         Reading: 16.64%
        Speaking: 10.55%
No people is in the room: 8.50%



 77%|███████▋  | 327/424 [01:30<00:45,  2.12it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/gem1VWkzae8_000875_000885.wav


 77%|███████▋  | 328/424 [01:30<00:42,  2.25it/s]

MoviePy - Done.

File: datasets/train/Normal/gem1VWkzae8_000875_000885.mp4
Ground Truth: Normal
Top predictions:
        Studying: 32.57%
   Setting table: 20.01%
  Using computer: 14.82%
Organizing space: 8.30%
         Working: 5.27%



 77%|███████▋  | 328/424 [01:30<00:42,  2.25it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Gf5htdt_Djg_000107_000117.wav


 77%|███████▋  | 328/424 [01:30<00:42,  2.25it/s]

MoviePy - Done.


 78%|███████▊  | 329/424 [01:31<00:46,  2.05it/s]


File: datasets/train/Normal/Gf5htdt_Djg_000107_000117.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 46.22%
Playing board games: 27.99%
   Setting table: 10.06%
          Dining: 3.36%
     Eating food: 2.87%



 78%|███████▊  | 329/424 [01:31<00:46,  2.05it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/gmE90XbBgOk_000000_000010.wav


 78%|███████▊  | 330/424 [01:31<00:43,  2.17it/s]

MoviePy - Done.

File: datasets/train/Normal/gmE90XbBgOk_000000_000010.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 37.92%
        Drinking: 9.00%
     Eating food: 8.95%
         Smoking: 8.58%
No people is in the room: 4.72%



 78%|███████▊  | 330/424 [01:31<00:43,  2.17it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/GnXROFauJVY_000026_000036.wav


 78%|███████▊  | 331/424 [01:31<00:41,  2.26it/s]

MoviePy - Done.

File: datasets/train/Normal/GnXROFauJVY_000026_000036.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 38.00%
           Party: 11.77%
No people is in the room: 10.46%
          Dining: 9.31%
      Using drug: 3.78%



 78%|███████▊  | 331/424 [01:32<00:41,  2.26it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/H89ENzVtnlg_000095_000105.wav


 78%|███████▊  | 331/424 [01:32<00:41,  2.26it/s]

MoviePy - Done.


 78%|███████▊  | 332/424 [01:32<00:42,  2.17it/s]


File: datasets/train/Normal/H89ENzVtnlg_000095_000105.mp4
Ground Truth: Normal
Top predictions:
       Showering: 36.99%
   Mopping floor: 14.09%
   Doing laundry: 10.40%
         Cooking: 8.30%
No people is in the room: 7.10%



 78%|███████▊  | 332/424 [01:32<00:42,  2.17it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/HJqSjhNOMjY_000143_000153.wav


 78%|███████▊  | 332/424 [01:32<00:42,  2.17it/s]

MoviePy - Done.

File: datasets/train/Normal/HJqSjhNOMjY_000143_000153.mp4
Ground Truth: Normal
Top predictions:


 79%|███████▊  | 333/424 [01:32<00:42,  2.13it/s]

No people is in the room: 45.52%
Playing board games: 7.06%
      Exercising: 6.64%
   Taking photos: 5.04%
        Chatting: 3.72%



 79%|███████▊  | 333/424 [01:33<00:42,  2.13it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/hM8CfzDDyIg_000158_000168.wav


 79%|███████▉  | 334/424 [01:33<00:40,  2.20it/s]

MoviePy - Done.

File: datasets/train/Normal/hM8CfzDDyIg_000158_000168.mp4
Ground Truth: Normal
Top predictions:
         Cooking: 53.74%
Unattended cooking: 35.16%
       Showering: 4.44%
   Setting table: 2.04%
      Decorating: 1.06%



 79%|███████▉  | 334/424 [01:33<00:40,  2.20it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/hQ4OpC2RuLY_000120_000130.wav


 79%|███████▉  | 335/424 [01:33<00:38,  2.29it/s]

MoviePy - Done.

File: datasets/train/Normal/hQ4OpC2RuLY_000120_000130.mp4
Ground Truth: Normal
Top predictions:
              TV: 50.32%
No people is in the room: 7.01%
  Using computer: 6.32%
     Celebrating: 6.03%
      Using drug: 5.29%



 79%|███████▉  | 335/424 [01:33<00:38,  2.29it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/hxyttQsXwOc_000079_000089.wav


 79%|███████▉  | 336/424 [01:34<00:36,  2.38it/s]

MoviePy - Done.

File: datasets/train/Normal/hxyttQsXwOc_000079_000089.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 50.27%
Playing with pets: 33.17%
        Drinking: 11.77%
         Talking: 1.14%
No people is in the room: 0.78%



 79%|███████▉  | 336/424 [01:34<00:36,  2.38it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/i70NodYlh7E_000336_000346.wav


 79%|███████▉  | 337/424 [01:34<00:36,  2.40it/s]

MoviePy - Done.

File: datasets/train/Normal/i70NodYlh7E_000336_000346.mp4
Ground Truth: Normal
Top predictions:
          Dining: 21.12%
   Setting table: 20.57%
      Using drug: 16.30%
         Talking: 12.67%
        Speaking: 4.68%



 79%|███████▉  | 337/424 [01:34<00:36,  2.40it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/IGHpI5LRzuk_000054_000064.wav


 80%|███████▉  | 338/424 [01:34<00:35,  2.45it/s]

MoviePy - Done.

File: datasets/train/Normal/IGHpI5LRzuk_000054_000064.mp4
Ground Truth: Normal
Top predictions:
  Brushing teeth: 97.73%
        Cleaning: 1.29%
       Showering: 0.31%
        Painting: 0.15%
      Using drug: 0.14%



 80%|███████▉  | 338/424 [01:34<00:35,  2.45it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/IKG6W6LmSlw_000000_000010.wav


 80%|███████▉  | 339/424 [01:35<00:33,  2.56it/s]

MoviePy - Done.

File: datasets/train/Normal/IKG6W6LmSlw_000000_000010.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 21.84%
  Using computer: 17.30%
        Drinking: 10.55%
No people is in the room: 7.53%
         Smoking: 7.11%



 80%|███████▉  | 339/424 [01:35<00:33,  2.56it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/JpgvBLEoGPY_000008_000018.wav


 80%|████████  | 340/424 [01:35<00:32,  2.59it/s]

MoviePy - Done.

File: datasets/train/Normal/JpgvBLEoGPY_000008_000018.mp4
Ground Truth: Normal
Top predictions:
         Potluck: 54.37%
     Celebrating: 11.12%
         Singing: 10.47%
      Using drug: 4.60%
      Meditating: 3.52%



 80%|████████  | 340/424 [01:35<00:32,  2.59it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Jvx3wT69CVI_000460_000470.wav


 80%|████████  | 340/424 [01:35<00:32,  2.59it/s]

MoviePy - Done.


 80%|████████  | 341/424 [01:35<00:32,  2.59it/s]


File: datasets/train/Normal/Jvx3wT69CVI_000460_000470.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 55.14%
   Doing laundry: 10.76%
   Setting table: 8.69%
    Sitting down: 5.47%
         Working: 4.01%

datasets/audios/train/Normal


 80%|████████  | 341/424 [01:36<00:32,  2.59it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/k088TLn7oIU_000090_000100.wav


 80%|████████  | 341/424 [01:36<00:32,  2.59it/s]

MoviePy - Done.


 81%|████████  | 342/424 [01:36<00:36,  2.26it/s]


File: datasets/train/Normal/k088TLn7oIU_000090_000100.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 56.67%
No people is in the room: 7.03%
  Using computer: 6.45%
         Reading: 5.67%
     Celebrating: 3.61%



 81%|████████  | 342/424 [01:36<00:36,  2.26it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/KCk7JLKqICc_000068_000078.wav


 81%|████████  | 343/424 [01:36<00:35,  2.31it/s]

MoviePy - Done.

File: datasets/train/Normal/KCk7JLKqICc_000068_000078.mp4
Ground Truth: Normal
Top predictions:
      Exercising: 30.37%
         Working: 20.34%
   Mopping floor: 16.58%
        Studying: 8.67%
        Cleaning: 5.71%



 81%|████████  | 343/424 [01:37<00:35,  2.31it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/kCyv6z2yar4_000002_000012.wav


 81%|████████  | 344/424 [01:37<00:33,  2.42it/s]

MoviePy - Done.

File: datasets/train/Normal/kCyv6z2yar4_000002_000012.mp4
Ground Truth: Normal
Top predictions:
         Dancing: 18.87%
     Celebrating: 11.88%
   Playing music: 11.60%
Playing with pets: 9.16%
Organizing space: 7.00%



 81%|████████  | 344/424 [01:37<00:33,  2.42it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/kW5BmCKH248_000447_000457.wav


 81%|████████▏ | 345/424 [01:37<00:32,  2.43it/s]

MoviePy - Done.

File: datasets/train/Normal/kW5BmCKH248_000447_000457.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 36.07%
        Drinking: 15.96%
   Setting table: 15.09%
  Using computer: 7.37%
      Using drug: 5.19%



 81%|████████▏ | 345/424 [01:37<00:32,  2.43it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/KY3dD8VcGnM_000022_000032.wav


 82%|████████▏ | 346/424 [01:38<00:31,  2.48it/s]

MoviePy - Done.

File: datasets/train/Normal/KY3dD8VcGnM_000022_000032.mp4
Ground Truth: Normal
Top predictions:
Playing board games: 17.34%
      Decorating: 15.83%
         Working: 8.11%
        Painting: 7.70%
  Using computer: 7.24%



 82%|████████▏ | 346/424 [01:38<00:31,  2.48it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/L0gZURkoVtM_000056_000066.wav


 82%|████████▏ | 347/424 [01:38<00:31,  2.45it/s]

MoviePy - Done.

File: datasets/train/Normal/L0gZURkoVtM_000056_000066.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 73.09%
      Using drug: 10.32%
Playing with pets: 3.54%
   Taking photos: 1.74%
         Smoking: 1.69%



 82%|████████▏ | 347/424 [01:38<00:31,  2.45it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/lCZLHNyUiMo_000003_000013.wav


 82%|████████▏ | 348/424 [01:38<00:30,  2.52it/s]

MoviePy - Done.

File: datasets/train/Normal/lCZLHNyUiMo_000003_000013.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 46.62%
     Suffocating: 7.56%
Domestic violence: 4.59%
    Falling down: 4.58%
Furniture Collapse: 4.15%



 82%|████████▏ | 348/424 [01:39<00:30,  2.52it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/Ll6b1B9YDgM_000032_000042.wav


 82%|████████▏ | 348/424 [01:39<00:30,  2.52it/s]

MoviePy - Done.


 82%|████████▏ | 349/424 [01:39<00:30,  2.42it/s]


File: datasets/train/Normal/Ll6b1B9YDgM_000032_000042.mp4
Ground Truth: Normal
Top predictions:
          Dining: 32.75%
        Studying: 16.82%
     Eating food: 13.82%
    Sitting down: 6.55%
        Drinking: 5.94%

datasets/audios/train/Normal


 82%|████████▏ | 349/424 [01:39<00:30,  2.42it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/N5n6j6vBGu0_000008_000018.wav


 83%|████████▎ | 350/424 [01:39<00:29,  2.52it/s]

MoviePy - Done.

File: datasets/train/Normal/N5n6j6vBGu0_000008_000018.mp4
Ground Truth: Normal
Top predictions:
         Reading: 27.79%
No people is in the room: 24.44%
        Speaking: 7.50%
   Doing laundry: 5.68%
         Talking: 4.78%



 83%|████████▎ | 350/424 [01:39<00:29,  2.52it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/oeguonb5OA0_000004_000014.wav


 83%|████████▎ | 351/424 [01:40<00:28,  2.55it/s]

MoviePy - Done.

File: datasets/train/Normal/oeguonb5OA0_000004_000014.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 47.94%
Playing with pets: 22.20%
         Talking: 7.25%
     Eating food: 7.19%
   Doing laundry: 3.51%



 83%|████████▎ | 351/424 [01:40<00:28,  2.55it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/oLPzKb6j80U_000009_000019.wav


 83%|████████▎ | 352/424 [01:40<00:27,  2.59it/s]

MoviePy - Done.

File: datasets/train/Normal/oLPzKb6j80U_000009_000019.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 20.78%
No people is in the room: 15.33%
      Meditating: 10.31%
         Singing: 7.96%
      Decorating: 6.93%



 83%|████████▎ | 352/424 [01:40<00:27,  2.59it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/OmBlRJEoY9s_000088_000098.wav


 83%|████████▎ | 353/424 [01:40<00:28,  2.52it/s]

MoviePy - Done.

File: datasets/train/Normal/OmBlRJEoY9s_000088_000098.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 43.90%
No people is in the room: 26.83%
Playing board games: 8.77%
          Dining: 4.52%
     Eating food: 3.40%



 83%|████████▎ | 353/424 [01:41<00:28,  2.52it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/oUc6ZrEBL80_000519_000529.wav


 83%|████████▎ | 354/424 [01:41<00:27,  2.52it/s]

MoviePy - Done.

File: datasets/train/Normal/oUc6ZrEBL80_000519_000529.mp4
Ground Truth: Normal
Top predictions:
        Cleaning: 37.98%
      Meditating: 14.13%
       Vacumning: 9.45%
   Mopping floor: 5.17%
       Showering: 4.13%



 83%|████████▎ | 354/424 [01:41<00:27,  2.52it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/P6AChrs1Txo_000187_000197.wav


 84%|████████▎ | 355/424 [01:41<00:27,  2.50it/s]

MoviePy - Done.

File: datasets/train/Normal/P6AChrs1Txo_000187_000197.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 48.12%
Playing with pets: 12.76%
Playing board games: 9.15%
   Setting table: 8.41%
   Mopping floor: 4.78%



 84%|████████▎ | 355/424 [01:41<00:27,  2.50it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/P8amgAResD4_000279_000289.wav


 84%|████████▍ | 356/424 [01:41<00:26,  2.61it/s]

MoviePy - Done.

File: datasets/train/Normal/P8amgAResD4_000279_000289.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 17.03%
         Talking: 14.66%
      Using drug: 11.24%
    Sitting down: 9.50%
        Speaking: 8.32%



 84%|████████▍ | 356/424 [01:42<00:26,  2.61it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/pz28ycbJiQc_000002_000012.wav


 84%|████████▍ | 357/424 [01:42<00:25,  2.58it/s]

MoviePy - Done.

File: datasets/train/Normal/pz28ycbJiQc_000002_000012.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 46.24%
         Reading: 17.98%
     Celebrating: 6.67%
         Talking: 5.11%
        Speaking: 4.52%



 84%|████████▍ | 357/424 [01:42<00:25,  2.58it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/PzhivDBg9F0_000016_000026.wav


 84%|████████▍ | 357/424 [01:42<00:25,  2.58it/s]

MoviePy - Done.


 84%|████████▍ | 358/424 [01:42<00:28,  2.35it/s]


File: datasets/train/Normal/PzhivDBg9F0_000016_000026.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 32.47%
        Speaking: 11.98%
      Using drug: 11.79%
         Reading: 7.04%
         Talking: 6.08%



 84%|████████▍ | 358/424 [01:43<00:28,  2.35it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/QAKwGG98wvc_000020_000030.wav


 85%|████████▍ | 359/424 [01:43<00:27,  2.38it/s]

MoviePy - Done.

File: datasets/train/Normal/QAKwGG98wvc_000020_000030.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 37.26%
      Using drug: 8.03%
      Decorating: 6.61%
  Using computer: 5.60%
        Studying: 5.27%



 85%|████████▍ | 359/424 [01:43<00:27,  2.38it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/rLQ5fmVSL94_000026_000036.wav


 85%|████████▍ | 360/424 [01:43<00:26,  2.44it/s]

MoviePy - Done.

File: datasets/train/Normal/rLQ5fmVSL94_000026_000036.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 15.45%
      Decorating: 11.13%
    Sitting down: 10.84%
      Using drug: 8.22%
  Using computer: 5.41%



 85%|████████▍ | 360/424 [01:43<00:26,  2.44it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/X46EhfXTqxg_000727_000737.wav


 85%|████████▌ | 361/424 [01:44<00:26,  2.42it/s]

MoviePy - Done.

File: datasets/train/Normal/X46EhfXTqxg_000727_000737.mp4
Ground Truth: Normal
Top predictions:
        Studying: 15.25%
      Decorating: 14.03%
   Doing laundry: 13.06%
  Using computer: 12.30%
   Setting table: 12.17%



 85%|████████▌ | 361/424 [01:44<00:26,  2.42it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_0xvqLRQvOA_000074_000084.wav


 85%|████████▌ | 362/424 [01:44<00:24,  2.50it/s]

MoviePy - Done.

File: datasets/train/Normal/_0xvqLRQvOA_000074_000084.mp4
Ground Truth: Normal
Top predictions:
    Sitting down: 78.99%
No people is in the room: 7.14%
   Mopping floor: 2.09%
   Doing laundry: 1.80%
Playing with pets: 1.21%



 85%|████████▌ | 362/424 [01:44<00:24,  2.50it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_3vZKUxeQtg_000298_000308.wav


 86%|████████▌ | 363/424 [01:44<00:24,  2.46it/s]

MoviePy - Done.

File: datasets/train/Normal/_3vZKUxeQtg_000298_000308.mp4
Ground Truth: Normal
Top predictions:
     Eating food: 95.62%
        Drinking: 3.98%
Playing with pets: 0.15%
         Choking: 0.07%
         Smoking: 0.04%



 86%|████████▌ | 363/424 [01:45<00:24,  2.46it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_4EOMgSAs2o_000094_000104.wav


 86%|████████▌ | 364/424 [01:45<00:23,  2.58it/s]

MoviePy - Done.

File: datasets/train/Normal/_4EOMgSAs2o_000094_000104.mp4
Ground Truth: Normal
Top predictions:
         Talking: 30.25%
         Reading: 23.84%
        Speaking: 18.54%
No people is in the room: 6.94%
         Singing: 4.66%



 86%|████████▌ | 364/424 [01:45<00:23,  2.58it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_5ItAUGPgqI_000742_000752.wav


 86%|████████▌ | 364/424 [01:45<00:23,  2.58it/s]

MoviePy - Done.


 86%|████████▌ | 365/424 [01:45<00:26,  2.21it/s]


File: datasets/train/Normal/_5ItAUGPgqI_000742_000752.mp4
Ground Truth: Normal
Top predictions:
Unattended cooking: 53.53%
         Cooking: 44.81%
No people is in the room: 0.53%
   Doing laundry: 0.39%
   Setting table: 0.35%

datasets/audios/train/Normal


 86%|████████▌ | 365/424 [01:46<00:26,  2.21it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/_82X-ic2kME_000001_000011.wav


 86%|████████▌ | 365/424 [01:46<00:26,  2.21it/s]

MoviePy - Done.


 86%|████████▋ | 366/424 [01:46<00:25,  2.29it/s]


File: datasets/train/Normal/_82X-ic2kME_000001_000011.mp4
Ground Truth: Normal
Top predictions:
  Brushing teeth: 29.66%
No people is in the room: 18.52%
      Using drug: 7.14%
        Drinking: 5.34%
         Talking: 5.17%



 86%|████████▋ | 366/424 [01:46<00:25,  2.29it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_8w7SkEkehQ_000037_000047.wav


 87%|████████▋ | 367/424 [01:46<00:24,  2.36it/s]

MoviePy - Done.

File: datasets/train/Normal/_8w7SkEkehQ_000037_000047.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 55.36%
No people is in the room: 13.89%
         Cooking: 5.85%
       Showering: 4.50%
   Doing laundry: 3.49%



 87%|████████▋ | 367/424 [01:46<00:24,  2.36it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_99gyAUUgy4_000051_000061.wav


 87%|████████▋ | 367/424 [01:46<00:24,  2.36it/s]

MoviePy - Done.


 87%|████████▋ | 368/424 [01:47<00:25,  2.23it/s]


File: datasets/train/Normal/_99gyAUUgy4_000051_000061.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 44.24%
    Sitting down: 10.07%
     Celebrating: 8.73%
   Setting table: 8.50%
  Using computer: 4.57%

datasets/audios/train/Normal


 87%|████████▋ | 368/424 [01:47<00:25,  2.23it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/_A0_T6M6ULQ_000004_000014.wav


 87%|████████▋ | 369/424 [01:47<00:23,  2.34it/s]

MoviePy - Done.

File: datasets/train/Normal/_A0_T6M6ULQ_000004_000014.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 42.09%
   Doing laundry: 19.32%
Playing with pets: 7.28%
         Working: 5.91%
     Celebrating: 4.78%



 87%|████████▋ | 369/424 [01:47<00:23,  2.34it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_cMswRMB3WU_000124_000134.wav


 87%|████████▋ | 369/424 [01:47<00:23,  2.34it/s]

MoviePy - Done.


 87%|████████▋ | 370/424 [01:48<00:24,  2.21it/s]


File: datasets/train/Normal/_cMswRMB3WU_000124_000134.mp4
Ground Truth: Normal
Top predictions:
     Celebrating: 27.88%
   Setting table: 18.39%
         Dancing: 6.61%
      Decorating: 6.11%
Playing with pets: 5.42%

datasets/audios/train/Normal


 87%|████████▋ | 370/424 [01:48<00:24,  2.21it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/_cOAB4Neu5Q_000237_000247.wav


 88%|████████▊ | 371/424 [01:48<00:23,  2.30it/s]

MoviePy - Done.

File: datasets/train/Normal/_cOAB4Neu5Q_000237_000247.mp4
Ground Truth: Normal
Top predictions:
Playing board games: 42.26%
         Reading: 18.05%
No people is in the room: 7.98%
         Talking: 5.37%
      Exercising: 3.07%



 88%|████████▊ | 371/424 [01:48<00:23,  2.30it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_fDxRDV_D28_000077_000087.wav


 88%|████████▊ | 372/424 [01:48<00:21,  2.38it/s]

MoviePy - Done.

File: datasets/train/Normal/_fDxRDV_D28_000077_000087.mp4
Ground Truth: Normal
Top predictions:
   Setting table: 41.09%
         Dancing: 20.02%
No people is in the room: 10.99%
    Sitting down: 5.57%
          Dining: 4.74%



 88%|████████▊ | 372/424 [01:49<00:21,  2.38it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_hnn0Q5Ee0g_000037_000047.wav


 88%|████████▊ | 372/424 [01:49<00:21,  2.38it/s]

MoviePy - Done.


 88%|████████▊ | 373/424 [01:49<00:22,  2.29it/s]


File: datasets/train/Normal/_hnn0Q5Ee0g_000037_000047.mp4
Ground Truth: Normal
Top predictions:
        Painting: 29.30%
     Eating food: 14.69%
        Knitting: 11.94%
        Cleaning: 7.17%
Playing with pets: 5.98%



 88%|████████▊ | 373/424 [01:49<00:22,  2.29it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_jSACF07scs_000041_000051.wav


 88%|████████▊ | 373/424 [01:49<00:22,  2.29it/s]

MoviePy - Done.


 88%|████████▊ | 374/424 [01:49<00:21,  2.36it/s]


File: datasets/train/Normal/_jSACF07scs_000041_000051.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 55.93%
    Sitting down: 21.78%
   Mopping floor: 8.37%
   Setting table: 4.77%
   Doing laundry: 2.36%

datasets/audios/train/Normal


 88%|████████▊ | 374/424 [01:49<00:21,  2.36it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/_JuSKnGWkp0_000230_000240.wav


 88%|████████▊ | 375/424 [01:50<00:19,  2.45it/s]

MoviePy - Done.

File: datasets/train/Normal/_JuSKnGWkp0_000230_000240.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 40.52%
   Setting table: 29.57%
No people is in the room: 5.35%
     Celebrating: 3.72%
         Reading: 3.67%



 88%|████████▊ | 375/424 [01:50<00:19,  2.45it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_Kcgee5kKS0_000057_000067.wav


 89%|████████▊ | 376/424 [01:50<00:19,  2.46it/s]

MoviePy - Done.

File: datasets/train/Normal/_Kcgee5kKS0_000057_000067.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 53.37%
   Doing laundry: 13.26%
   Setting table: 8.38%
Playing with pets: 8.05%
         Working: 2.26%



 89%|████████▊ | 376/424 [01:50<00:19,  2.46it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_L2i7A8dMTg_000020_000030.wav


 89%|████████▉ | 377/424 [01:50<00:19,  2.46it/s]

MoviePy - Done.

File: datasets/train/Normal/_L2i7A8dMTg_000020_000030.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 74.33%
Playing with pets: 18.22%
Playing board games: 1.40%
     Eating food: 1.34%
   Setting table: 1.23%



 89%|████████▉ | 377/424 [01:51<00:19,  2.46it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_ONRAEcQsqw_000101_000111.wav


 89%|████████▉ | 378/424 [01:51<00:17,  2.57it/s]

MoviePy - Done.

File: datasets/train/Normal/_ONRAEcQsqw_000101_000111.mp4
Ground Truth: Normal
Top predictions:
No people is in the room: 33.59%
      Exercising: 8.30%
         Working: 8.10%
   Doing laundry: 7.33%
   Mopping floor: 5.54%



 89%|████████▉ | 378/424 [01:51<00:17,  2.57it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_P7OzblP4KU_000002_000012.wav


 89%|████████▉ | 379/424 [01:51<00:17,  2.60it/s]

MoviePy - Done.

File: datasets/train/Normal/_P7OzblP4KU_000002_000012.mp4
Ground Truth: Normal
Top predictions:
       Showering: 57.79%
No people is in the room: 26.84%
   Doing laundry: 7.12%
Playing with pets: 2.46%
         Talking: 1.24%



 89%|████████▉ | 379/424 [01:51<00:17,  2.60it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_Q5f75CvNfw_000103_000113.wav


 90%|████████▉ | 380/424 [01:51<00:17,  2.58it/s]

MoviePy - Done.

File: datasets/train/Normal/_Q5f75CvNfw_000103_000113.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 31.09%
Organizing space: 8.75%
      Decorating: 7.55%
      Using drug: 7.05%
        Painting: 6.05%



 90%|████████▉ | 380/424 [01:52<00:17,  2.58it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_SlPWSA-1bU_000144_000154.wav


 90%|████████▉ | 381/424 [01:52<00:16,  2.61it/s]

MoviePy - Done.

File: datasets/train/Normal/_SlPWSA-1bU_000144_000154.mp4
Ground Truth: Normal
Top predictions:
         Potluck: 35.77%
     Celebrating: 17.94%
No people is in the room: 11.31%
         Dancing: 5.76%
Playing board games: 5.37%



 90%|████████▉ | 381/424 [01:52<00:16,  2.61it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_SsAvyBagZw_000011_000021.wav


 90%|████████▉ | 381/424 [01:52<00:16,  2.61it/s]

MoviePy - Done.


 90%|█████████ | 382/424 [01:52<00:16,  2.60it/s]


File: datasets/train/Normal/_SsAvyBagZw_000011_000021.mp4
Ground Truth: Normal
Top predictions:
              TV: 30.96%
No people is in the room: 27.59%
         Working: 21.47%
  Using computer: 4.55%
     Laying down: 3.26%

datasets/audios/train/Normal


 90%|█████████ | 382/424 [01:52<00:16,  2.60it/s]

MoviePy - Writing audio in datasets/audios/train/Normal/_TqghEKOFAQ_000045_000055.wav


 90%|█████████ | 383/424 [01:53<00:15,  2.64it/s]

MoviePy - Done.

File: datasets/train/Normal/_TqghEKOFAQ_000045_000055.mp4
Ground Truth: Normal
Top predictions:
  Using computer: 49.86%
      Using drug: 22.21%
No people is in the room: 4.00%
Organizing space: 2.96%
   Setting table: 2.49%



 90%|█████████ | 383/424 [01:53<00:15,  2.64it/s]

datasets/audios/train/Normal
MoviePy - Writing audio in datasets/audios/train/Normal/_WijOegXa_Q_000391_000401.wav


 90%|█████████ | 383/424 [01:53<00:15,  2.64it/s]

MoviePy - Done.


 91%|█████████ | 384/424 [01:53<00:16,  2.49it/s]


File: datasets/train/Normal/_WijOegXa_Q_000391_000401.mp4
Ground Truth: Normal
Top predictions:
        Drinking: 67.84%
     Eating food: 10.58%
No people is in the room: 4.19%
          Dining: 3.96%
   Setting table: 3.00%



 91%|█████████ | 386/424 [01:53<00:09,  3.82it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo1.868.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo10.869.mp4 has no audio!


 92%|█████████▏| 389/424 [01:54<00:05,  5.85it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo11.870.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo2.871.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo3.872.mp4 has no audio!


 92%|█████████▏| 391/424 [01:54<00:04,  7.20it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo4.873.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo5.874.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo6.875.mp4 has no audio!


 93%|█████████▎| 394/424 [01:54<00:03,  8.33it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo7.876.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/posVideo8.877.mp4 has no audio!


 93%|█████████▎| 396/424 [01:54<00:03,  8.97it/s]

This video datasets/train/Smoke or Fire or Open flame/posVideo9.878.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video102.mp4 has no audio!


 94%|█████████▍| 399/424 [01:55<00:02,  9.67it/s]

This video datasets/train/Smoke or Fire or Open flame/Video109.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video11.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video115.mp4 has no audio!


 94%|█████████▍| 400/424 [01:55<00:02,  9.71it/s]

This video datasets/train/Smoke or Fire or Open flame/Video134_1.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video136.mp4 has no audio!


 95%|█████████▌| 404/424 [01:55<00:02,  9.60it/s]

This video datasets/train/Smoke or Fire or Open flame/Video14.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video149.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video150.mp4 has no audio!


 96%|█████████▌| 406/424 [01:55<00:01, 10.13it/s]

This video datasets/train/Smoke or Fire or Open flame/Video157.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video170.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video202.mp4 has no audio!


 97%|█████████▋| 410/424 [01:56<00:01, 10.11it/s]

This video datasets/train/Smoke or Fire or Open flame/Video209.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video21.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video234.mp4 has no audio!


 97%|█████████▋| 412/424 [01:56<00:01, 10.02it/s]

This video datasets/train/Smoke or Fire or Open flame/Video239.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video257.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video37.mp4 has no audio!


 98%|█████████▊| 414/424 [01:56<00:00, 10.19it/s]

This video datasets/train/Smoke or Fire or Open flame/Video51.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video67.mp4 has no audio!


 98%|█████████▊| 416/424 [01:56<00:00,  9.82it/s]

This video datasets/train/Smoke or Fire or Open flame/Video73.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video74.mp4 has no audio!


 99%|█████████▉| 419/424 [01:57<00:00,  9.38it/s]

This video datasets/train/Smoke or Fire or Open flame/Video75_1.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video75_2.mp4 has no audio!


 99%|█████████▉| 421/424 [01:57<00:00,  8.47it/s]

This video datasets/train/Smoke or Fire or Open flame/Video78.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video82.mp4 has no audio!


100%|██████████| 424/424 [01:57<00:00,  9.27it/s]

This video datasets/train/Smoke or Fire or Open flame/Video85.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video91.mp4 has no audio!
This video datasets/train/Smoke or Fire or Open flame/Video99.mp4 has no audio!


100%|██████████| 424/424 [01:57<00:00,  3.60it/s]


In [5]:
for label in total:
    print(f"{label}: {correct[label]} / {total[label]}")

Crying: 36 / 50
Falling down: 0 / 30
Glass breaking: 35 / 53
Gunshot: 32 / 51
Normal: 0 / 200
Smoke or Fire or Open flame: 0 / 40
