Convert all videos into photos by 1 fps

In [1]:
import wave
from PIL import Image
import os
import cv2
from tqdm import tqdm
directory = '/home/qihan/Playground/Data/User_3'


In [2]:


def extract_frames_from_video(video_file, output_folder, frame_rate=30):
    #print(video_file)
    video = cv2.VideoCapture(video_file)
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    downsample_factor = int(video.get(cv2.CAP_PROP_FPS) / frame_rate)
    folder_name = f'{output_folder}'
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    progress_bar = tqdm(total=num_frames, desc='Extracting frames', unit='frame')

    for frame in range(0, num_frames, downsample_factor):
        video.set(cv2.CAP_PROP_POS_FRAMES, frame)
        ret, frame_data = video.read()
        if ret:
            image = Image.fromarray(cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB))
            image.save(f'{folder_name}/frame_{frame}.png')
        progress_bar.update(1)

    progress_bar.close()
    video.release()

def convert_videos_to_images(input_folder, output_folder, frame_rate=30):
    video_files = [file for file in os.listdir(input_folder) if file.endswith('.mp4')]

    for video_file in video_files:
        video_name = os.path.splitext(video_file)[0]
        subfolder_name = video_name.split('_')[1]
        subfolder_path = os.path.join(input_folder, subfolder_name)
        #print(subfolder_path)
        if not os.path.exists(subfolder_path):
            os.makedirs(subfolder_path)
        new_name = os.path.join(subfolder_path, video_name.split('_')[2])


        
        os.makedirs(new_name, exist_ok=True)
        extract_frames_from_video(os.path.join(input_folder, video_file), new_name, frame_rate)
        #os.rmdir(video_output_folder)  # Delete the video_output_folder after extracting frames
        


In [None]:

for item in os.listdir(directory):
    item_path = os.path.join(directory, item)
    if os.path.isdir(item_path):
        convert_videos_to_images(item_path, item_path, 1)


now extract the audio file

In [23]:
import wave
import numpy as np

# Load the audio file
def audio_cropper(audio_path):
    # Read the audio file
    with wave.open(audio_path, 'rb') as audio_file:
        # Get audio properties
        channels = audio_file.getnchannels()
        sample_width = audio_file.getsampwidth()
        frame_rate = audio_file.getframerate()
        frames = audio_file.getnframes()

        # Read audio data
        audio_data = audio_file.readframes(frames)

    # Convert audio data to numpy array
    audio = np.frombuffer(audio_data, dtype=np.int16)
    # Set the duration to keep
    duration = 15  # in seconds

    # Calculate the number of frames to keep
    frames_to_keep = int(duration * frame_rate)

    # Keep only the first 'frames_to_keep' frames
    audio = audio[:frames_to_keep]
    # Set the threshold for silence detection
    threshold = 1000

    # Find the indices where the amplitude is higher than the threshold
    above_threshold_indices = np.where(audio > threshold)[0]

    # Find the last time where the amplitude is higher than the threshold
    last_time_above_threshold = above_threshold_indices[-1] / frame_rate

    # Remove the audio after the last time_above_threshold
    last_index_above_threshold = int(last_time_above_threshold * frame_rate)
    audio = audio[:last_index_above_threshold+1*frame_rate]

    # Plot the updated amplitude
    import matplotlib.pyplot as plt
    # Write the updated audio data back to the file
    with wave.open(audio_path, 'wb') as audio_file:
        audio_file.setnchannels(channels)  # Set the number of channels
        audio_file.setframerate(frame_rate)

        audio_file.setsampwidth(sample_width)  # Set the sample width
        audio_file.setnframes(len(audio))
        audio_file.writeframes(audio.tobytes())

In [26]:
# Get a list of files in the input directory
files = os.listdir(os.path.join(directory,"audio"))

# Iterate over the files and call the audio_cropper() function
for file in files:
    file_path = os.path.join(os.path.join(directory,"audio"), file)
    audio_cropper(file_path)

now blur out the face in video 1 and 3

In [8]:
import subprocess
path_to_image_dir = os.path.join(directory,'video_1')

# Iterate over subfolders
for subfolder in os.listdir(path_to_image_dir):
    subfolder_path = os.path.join(path_to_image_dir, subfolder)
    
    # Check if the item is a directory
    if os.path.isdir(subfolder_path):
        # Iterate over images in the subfolder
        for image_file in os.listdir(subfolder_path):
            if "anonymized" not in image_file:
                continue
            #    os.remove(os.path.join(subfolder_path, image_file))
            #else:
            #    image_path = os.path.join(subfolder_path, image_file)
            #    new_image_path = os.path.join(subfolder_path, image_file.replace("_anonymized", ""))
            #    os.rename(image_path, new_image_path)

            image_path = os.path.join(subfolder_path, image_file)
            
            # Call the deface function for each image
            subprocess.call(["deface", image_path, image_path,"--backend","opencv"])
    
    pass

now crop out all faces in video 2

In [36]:
from face_crop_plus import Cropper
import os
import shutil
import torch

print(torch.cuda.is_available())
print(torch.cuda.current_device())
cropper = Cropper(face_factor=0.7, strategy="largest",device="cuda")

path_to_video_2_dir = os.path.join(directory,'video_2')
for task_folder in os.listdir(path_to_video_2_dir):

    if os.path.isdir(os.path.join(path_to_video_2_dir,task_folder)):
        for episode_folder in os.listdir(os.path.join(path_to_video_2_dir,task_folder)):

            if "face" in episode_folder:
                continue
            
            folder_path = os.path.join(path_to_video_2_dir, task_folder, episode_folder)
            if os.path.isdir(folder_path): 
                cropper.process_dir(input_dir=folder_path) 
                shutil.rmtree(folder_path)


for task_folder in os.listdir(path_to_video_2_dir):
    if os.path.isdir(os.path.join(path_to_video_2_dir, task_folder)):
        for episode_folder in os.listdir(os.path.join(path_to_video_2_dir, task_folder)):
            if "face" in episode_folder:
                new_episode_folder = episode_folder.replace("_face", "")
                folder_path = os.path.join(path_to_video_2_dir, task_folder, episode_folder)
                new_folder_path = os.path.join(path_to_video_2_dir, task_folder, new_episode_folder)
                os.rename(folder_path, new_folder_path)
                 


True
0


Processing: 100%|██████████| 2/2 [00:04<00:00,  2.45s/it]
Processing: 100%|██████████| 1/1 [00:01<00:00,  1.57s/it]
Processing: 100%|██████████| 2/2 [00:01<00:00,  1.22it/s]
Processing: 100%|██████████| 2/2 [00:01<00:00,  1.14it/s]
Processing: 100%|██████████| 2/2 [00:01<00:00,  1.26it/s]
Processing: 100%|██████████| 3/3 [00:02<00:00,  1.03it/s]
Processing: 100%|██████████| 2/2 [00:01<00:00,  1.01it/s]
Processing: 100%|██████████| 3/3 [00:02<00:00,  1.06it/s]
Processing: 100%|██████████| 6/6 [00:06<00:00,  1.02s/it]
Processing: 100%|██████████| 3/3 [00:03<00:00,  1.07s/it]
Processing: 100%|██████████| 2/2 [00:02<00:00,  1.09s/it]
Processing: 100%|██████████| 2/2 [00:02<00:00,  1.11s/it]
Processing: 100%|██████████| 2/2 [00:01<00:00,  1.17it/s]
Processing: 100%|██████████| 2/2 [00:01<00:00,  1.03it/s]
Processing: 100%|██████████| 3/3 [00:02<00:00,  1.08it/s]
Processing: 100%|██████████| 2/2 [00:01<00:00,  1.07it/s]
Processing: 100%|██████████| 3/3 [00:02<00:00,  1.08it/s]
Processing: 10

/home/qihan/Playground/Data/User_3/video_1/1
/home/qihan/Playground/Data/User_3/video_1/3
/home/qihan/Playground/Data/User_3/video_1/2
/home/qihan/Playground/Data/User_3/video_1/1
/home/qihan/Playground/Data/User_3/video_1/1
/home/qihan/Playground/Data/User_3/video_1/2
/home/qihan/Playground/Data/User_3/video_1/0
/home/qihan/Playground/Data/User_3/video_1/2
/home/qihan/Playground/Data/User_3/video_1/1
/home/qihan/Playground/Data/User_3/video_1/0
/home/qihan/Playground/Data/User_3/video_1/1
/home/qihan/Playground/Data/User_3/video_1/3
/home/qihan/Playground/Data/User_3/video_1/3
/home/qihan/Playground/Data/User_3/video_1/2
/home/qihan/Playground/Data/User_3/video_1/1
/home/qihan/Playground/Data/User_3/video_1/2
/home/qihan/Playground/Data/User_3/video_1/1
/home/qihan/Playground/Data/User_3/video_1/2
/home/qihan/Playground/Data/User_3/video_1/2
/home/qihan/Playground/Data/User_3/video_1/3
/home/qihan/Playground/Data/User_3/video_1/1
/home/qihan/Playground/Data/User_3/video_1/1
/home/qiha

audio transcribe

In [5]:
import whisper

model = whisper.load_model("medium.en")


  from .autonotebook import tqdm as notebook_tqdm


In [6]:

path_to_audio_dir = os.path.join(directory, "audio")
for file in os.listdir(path_to_audio_dir):
    file_path = os.path.join(path_to_audio_dir, file)
    if file_path.endswith(".wav"):
    
        print(file_path)
        result = model.transcribe(file_path)
        print(result["text"])
        
        # Write the result to a text file with the same name as file_path
        output_file_path = file_path.split(".")[0] + ".txt"
        with open(output_file_path, 'w') as output_file:
            output_file.write(result["text"])


/home/qihan/Playground/Data/User_3/audio/3_2_0_audio.wav
 Hi robot, pass me the pink marker.
/home/qihan/Playground/Data/User_3/audio/3_2_3_audio.wav
 Put it back.
/home/qihan/Playground/Data/User_3/audio/3_3_0_audio.wav
 Put the photo frame at top right of the table.
/home/qihan/Playground/Data/User_3/audio/3_3_10_audio.wav
 Hi robot, grab the plant and put it on the left side of the table.
/home/qihan/Playground/Data/User_3/audio/3_3_6_audio.wav
 A robot put the little plant on the left side of the basket on the table.
/home/qihan/Playground/Data/User_3/audio/3_2_6_audio.wav
 Well, give me the pink marker again. Oh, didn't catch it.
/home/qihan/Playground/Data/User_3/audio/3_0_0_audio.wav
 Hi robot, place the cookie in a tray.
/home/qihan/Playground/Data/User_3/audio/3_1_15_audio.wav
 Hi robot, put the ketchup on the left side of the tree.
/home/qihan/Playground/Data/User_3/audio/3_2_5_audio.wav
 Hi robot, give me the red marker.
/home/qihan/Playground/Data/User_3/audio/3_2_7_audio.w

KeyboardInterrupt: 

In [17]:
import os


def concatenate_text_files(directory, output_file):
    lines = []
    for filename in os.listdir(directory):
        if filename.endswith('.txt') and "all_lines" not in filename:
            user_number = filename.split('_')[0]
            task_number = filename.split('_')[1]
            index_number = filename.split('_')[2]
            with open(os.path.join(directory, filename)) as infile:
                for i, line in enumerate(infile, start=1):
                    lines.append((int(index_number), line.strip()))
    lines.sort(key=lambda x: x[0])  # Sort lines by index number
    with open(output_file, 'w') as outfile:
        for index, line in lines:
            outfile.write(f'{index}: {line};\n')


In [None]:

            
for i in range(0,4):
    path_to_audio_dir = os.path.join(directory, "audio",str(i))
    if not os.path.exists(path_to_audio_dir):
        os.mkdir(path_to_audio_dir)
  
    output_file= os.path.join(path_to_audio_dir,"all_lines.txt")
    concatenate_text_files(path_to_audio_dir, output_file)
    
