### Purpose of this notebook
The purpose of this notebook is to download WLASL dataset and upload to S3 bucket after processing as Numpy

The videos will be labeled by the 'gloss' name from the Json.
Some videos only have the required sign between Frame-A and Frame-B. These will be cropped accordingly

In [1]:
#Install missing packages
#!pip install boto3
#!pip install pyarrow
#!pip install fastparquet
#!pip install s3fs
#!pip install mediapipe
#!pip install kaggle

In [2]:
# Import required libraries
import boto3 #Video files get read through this
import cv2
import os
import pandas as pd
import math
import numpy as np
import random
import io
import s3fs # Parquet files get read through this
import zlib # For compression
import time # To calculate download time
import configparser
import requests
import psutil # Checks memory usage
import tempfile
import json
import yt_dlp
import subprocess
from tqdm import tqdm
#import mediapipe as mp


In [3]:
import configparser

config = configparser.ConfigParser()
config.read('/home/ec2-user/.aws/credentials')

['/home/ec2-user/.aws/credentials']

In [4]:
aws_access_key_id = config["default"]['aws_access_key_id']
aws_secret_access_key = config["default"]['aws_secret_access_key']
bucket_name = 'asl-capstone'
prefix = '/msasl/RGB/'
save_path = '/content/temp_folder'
s3_URI = 's3://asl-capstone/msasl/RGB/'

In [5]:
# Create an s3 object
s3 = boto3.client('s3',aws_access_key_id = aws_access_key_id, aws_secret_access_key = aws_secret_access_key,
                  region_name = 'us-west-2')

In [6]:
# Create an S3FS object
fs = s3fs.S3FileSystem(key=aws_access_key_id, secret=aws_secret_access_key) # Define an S3FS object

In [7]:
# # Let's read the WLASL json file to map the number to the word
# filename = "wlasl-dataset/WLASL_v0.3.json"
# wlasl_df = pd.read_json(filename)

In [8]:
# display(wlasl_df.head())
# print(wlasl_df.shape)
# #print(wlasl_df['instances'][0])

In [9]:
# # Not all videos from the JSON exist. Let's create a function to check which ones exist
# def video_features(json_data):
#     video_list = []
#     frame_list = []
#     gloss = json_data['gloss']
#     json_list = json_data['instances']
#     for id in json_list:
#         vid = id['video_id']
#         frame_start = id['frame_start']
#         frame_end = id['frame_end']
#         bbox = id['bbox']
#         if os.path.exists(f"wlasl-dataset/videos/{vid}.mp4"):
#             features = {'word':gloss,'id':vid,'start':frame_start, 'end':frame_end,'box':bbox}
#             #yield features
#             video_list.append(features)
#     return video_list

In [10]:
# # Load the data and get the features
# with open("wlasl-dataset/WLASL_v0.3.json",'r') as file:
#     json_data = file.read()
# json_data = json.loads(json_data)

In [11]:
# # Let's extract all relevant features from the JSON
# video_list = []
# for data in json_data:
#     video_list.append(video_features(data))

In [12]:
# # Let's check how many unique words we have
# word_list = []
# for video in video_list:
#     for vid in video:
#         word_list.append(vid['word'])

In [13]:
# print(len(word_list))
# word_list = list(set(word_list))
# print(len(word_list))
# print(word_list[:10])

In [14]:
# # Let's now work on the MS-ASL Dataset
# filenames = ['MSASL/MSASL_train.json','MSASL/MSASL_val.json','MSASL/MSASL_test.json']
# msasl_words = []
# for file in filenames:
#     with open("MSASL/MSASL_train.json",'r') as file:
#         json_data = file.read()
#     json_data = json.loads(json_data)
#     for word in json_data:
#         msasl_words.append(word)

In [15]:
# print(msasl_words[0])

In [16]:
# def msasl_features(msasl_word):
#     gloss = msasl_word['clean_text']
#     url = msasl_word['url']
#     frame_start = msasl_word['start_time']*msasl_word['fps']
#     frame_end = msasl_word['end_time']*msasl_word['fps']
#     bbox = msasl_word['box']
#     features = {'word':gloss,'url':url,'start':frame_start, 'end':frame_end,'box':bbox}
#     return features
    

In [17]:
# msasl_data = [] # Store all relevant information about the MSASL dataset
# for word in msasl_words:
#     msasl_data.append(msasl_features(word))    

In [18]:
# print(msasl_data[:4])

In [19]:
# matching_data = [x for x in msasl_data if x['word'] in word_list]
# print(matching_data[:4])

In [20]:
# print(len(matching_data))

In [21]:
# # Sort all the elements by the word so that they come sequentially
# matching_data = sorted(matching_data, key = lambda x: x['word'])

In [22]:
# print(matching_data[:10])

In [23]:
with fs.open(f"s3://asl-capstone/msasl/matching_words.parquet","rb") as f: # Read the data from S3
    matching_data = pd.read_parquet(f)

In [24]:
matching_data = matching_data.to_dict('records') # Convert to a list of dictionaries

In [25]:
# We need only a max of 20 videos per word
collected_words = []
matching_word = ""
counter = 0
for data in matching_data:
    #print(data['word'])
    if data['word'] != matching_word:
        matching_word = data['word']
        counter = 0
    if counter < 20:
        collected_words.append(data)
        counter+=1

In [26]:
print(len(collected_words))

16473


In [27]:
# Let's visualise the data
words_df = pd.DataFrame(collected_words)
word_counts = words_df['word'].value_counts()

In [28]:
with fs.open(f"s3://asl-capstone/msasl/master_file_Deanna_1.parquet","rb") as f:
    read_words = pd.read_parquet(f)

In [29]:
print(len(read_words))
read_words['caption'].value_counts()

2414


caption
quiet         22
ring          21
spain         20
strawberry    20
store         20
              ..
table          8
psychology     6
spell          3
purchase       3
popcorn        3
Name: count, Length: 127, dtype: int64

In [30]:
collected_words = collected_words[440:] # Drop the first 440 words since those were already processed

In [31]:
len(collected_words)

16033

In [32]:
def check_video(url): #Checks if a YouTube video is available and returns True if available and False if not
    try:
        result = subprocess.run(["yt-dlp", "--simulate", "--get-url", url], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE);
        return True
    except subprocess.CalledProcessError:
        return False # Video is not available in YouTube

In [33]:
def download_vid(url, filename):
    try:
        filename = "temp_folder/"+filename
        if os.path.exists(filename):
            os.remove(filename)
            # print("Deleted file")
        if not os.path.exists(filename):
            command = ["yt-dlp","-o",filename,url]
            subprocess.run(command);
            return True
    except:
        return False
        

In [34]:
def crop_center_square(frame):
    y, x = frame.shape[0:2]
    if x > y:
        start_x = (x-y)/2
        end_x = start_x + y
        start_x = int(start_x)
        end_x = int(end_x)
        return frame[:, int(start_x):int(end_x)]
    else:
        return frame

In [35]:
def load_video(filename, max_frames=0, resize=(256,256)):
    filename = "temp_folder/"+filename
    video_capture = cv2.VideoCapture(filename)
    frames = []
    try:
      while video_capture.isOpened():
        ret, frame = video_capture.read()
        if not ret:
          break
        frame = crop_center_square(frame)
        frame = cv2.resize(frame, resize)
        frame = frame[:, :, [2, 1, 0]]
        frames.append(frame)

        if len(frames) == max_frames:
          break
    finally:
      video_capture.release()
    return np.array(frames) / 255.0

In [36]:
def save_frame_file(video_array, filename, start_frame, end_frame, word):
    s3_uri = "s3://asl-capstone/msasl/RGB/"
    filename = filename+".npy"
    if end_frame!=-1:
        video_array = video_array[int(start_frame):int(end_frame)].astype(np.float32)
    else:
        video_array = video_array.astype(np.float32)
        
    with fs.open(f"{s3_uri+filename}","wb") as f:
        np.save(f,video_array)
    try:
        with fs.open(f"s3://asl-capstone/msasl/master_file_Deanna_1.parquet","rb") as f:
            parquet_df = pd.read_parquet(f)
    except:
        parquet_df = pd.DataFrame()
        print("Parquet does not exist yet")
    temp_df = pd.DataFrame({'caption':[word], 'path':[s3_uri+filename]})
    parquet_df = pd.concat([parquet_df,temp_df], ignore_index=True)
    with fs.open(f"s3://asl-capstone/msasl/master_file_Deanna_1.parquet","wb") as f:
        parquet_df.to_parquet(f)

    

In [37]:
# deanna_collected_words = collected_words[10688:]
deanna_collected_words = collected_words[3993:5355]

iteration_1 = deanna_collected_words[:452]
iteration_2 = deanna_collected_words[452:908]
iteration_3 = deanna_collected_words[908:]


len(iteration_1)

452

In [40]:
%%time
filename_counter = {}
for video in tqdm(iteration_1[400:], desc="Processing videos"):
    try:
        word = video['word']
        url = video['url']
        start_frame = video['start']
        end_frame = video['end']
        if word in filename_counter:
            filename_counter[word]+=1
            filename = f"{word}_{filename_counter[word]}"
        else:
            filename_counter[word] = 1
            filename = f"{word}"
        #print(filename, url)
        downloaded_filename = filename+".mp4"
        download_vid(url, downloaded_filename);
        video_array = load_video(downloaded_filename);
        save_frame_file(video_array, filename, start_frame, end_frame, word);
        local_filename = "temp_folder/"+downloaded_filename
        os.remove(local_filename)
    except Exception as e:
        # print(f"Error with video download. Skipping it")
        print(e)

Processing videos:   0%|                                                                                               | 0/52 [00:00<?, ?it/s]

[youtube] Extracting URL: https://www.youtube.com/watch?v=sErq0TJMKEo
[youtube] sErq0TJMKEo: Downloading webpage
[youtube] sErq0TJMKEo: Downloading ios player API JSON
[youtube] sErq0TJMKEo: Downloading android player API JSON
[youtube] sErq0TJMKEo: Downloading m3u8 information
[info] sErq0TJMKEo: Downloading 1 format(s): 18
[download] Destination: temp_folder/everyday.mp4
[download] 100% of    9.31MiB in 00:00:00 at 27.52MiB/s    


Processing videos:   2%|█▋                                                                                     | 1/52 [00:15<13:33, 15.95s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=73icFhednQU
[youtube] 73icFhednQU: Downloading webpage
[youtube] 73icFhednQU: Downloading ios player API JSON
[youtube] 73icFhednQU: Downloading android player API JSON


ERROR: [youtube] 73icFhednQU: Private video. Sign in if you've been granted access to this video
Processing videos:   4%|███▎                                                                                   | 2/52 [00:16<05:50,  7.02s/it]

[Errno 2] No such file or directory: 'temp_folder/everyday_2.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=my4mxg6lXYQ
[youtube] my4mxg6lXYQ: Downloading webpage
[youtube] my4mxg6lXYQ: Downloading ios player API JSON
[youtube] my4mxg6lXYQ: Downloading android player API JSON
[youtube] my4mxg6lXYQ: Downloading m3u8 information
[info] my4mxg6lXYQ: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_3.mp4
[download] 100% of   73.49MiB in 00:00:03 at 21.64MiB/s    


Processing videos:   6%|█████                                                                                  | 3/52 [00:48<15:00, 18.37s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=U0E24IK06nc
[youtube] U0E24IK06nc: Downloading webpage
[youtube] U0E24IK06nc: Downloading ios player API JSON
[youtube] U0E24IK06nc: Downloading android player API JSON
[youtube] U0E24IK06nc: Downloading m3u8 information
[info] U0E24IK06nc: Downloading 1 format(s): 18
[download] Destination: temp_folder/everyday_4.mp4
[download] 100% of    5.00MiB in 00:00:00 at 15.53MiB/s    


Processing videos:   8%|██████▋                                                                                | 4/52 [00:56<11:23, 14.24s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=2nXrJ_7NOgE
[youtube] 2nXrJ_7NOgE: Downloading webpage
[youtube] 2nXrJ_7NOgE: Downloading ios player API JSON
[youtube] 2nXrJ_7NOgE: Downloading android player API JSON
[youtube] 2nXrJ_7NOgE: Downloading m3u8 information
[info] 2nXrJ_7NOgE: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_5.mp4
[download] 100% of   50.58MiB in 00:00:03 at 15.48MiB/s    


Processing videos:  10%|████████▎                                                                              | 5/52 [01:25<15:16, 19.50s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=Gs9zBeSIVIE
[youtube] Gs9zBeSIVIE: Downloading webpage
[youtube] Gs9zBeSIVIE: Downloading ios player API JSON
[youtube] Gs9zBeSIVIE: Downloading android player API JSON
[youtube] Gs9zBeSIVIE: Downloading m3u8 information
[info] Gs9zBeSIVIE: Downloading 1 format(s): 18
[download] Destination: temp_folder/everyday_6.mp4
[download] 100% of   16.71MiB in 00:00:00 at 28.58MiB/s    


Processing videos:  12%|██████████                                                                             | 6/52 [01:39<13:26, 17.52s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=LOdG5cVf_QQ
[youtube] LOdG5cVf_QQ: Downloading webpage
[youtube] LOdG5cVf_QQ: Downloading ios player API JSON
[youtube] LOdG5cVf_QQ: Downloading android player API JSON
[youtube] LOdG5cVf_QQ: Downloading m3u8 information
[youtube] LOdG5cVf_QQ: Downloading MPD manifest
[info] LOdG5cVf_QQ: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_7.mp4
[download] 100% of    1.12MiB in 00:00:00 at 1.74MiB/s     


Processing videos:  13%|███████████▋                                                                           | 7/52 [01:43<10:01, 13.37s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=oJrNJvlvIEI
[youtube] oJrNJvlvIEI: Downloading webpage
[youtube] oJrNJvlvIEI: Downloading ios player API JSON
[youtube] oJrNJvlvIEI: Downloading android player API JSON
[youtube] oJrNJvlvIEI: Downloading m3u8 information
[youtube] oJrNJvlvIEI: Downloading MPD manifest
[info] oJrNJvlvIEI: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_8.mp4
[download] 100% of    3.16MiB in 00:00:06 at 537.93KiB/s   


Processing videos:  15%|█████████████▍                                                                         | 8/52 [01:52<08:46, 11.96s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=oJrNJvlvIEI
[youtube] oJrNJvlvIEI: Downloading webpage
[youtube] oJrNJvlvIEI: Downloading ios player API JSON
[youtube] oJrNJvlvIEI: Downloading android player API JSON
[youtube] oJrNJvlvIEI: Downloading m3u8 information
[youtube] oJrNJvlvIEI: Downloading MPD manifest
[info] oJrNJvlvIEI: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_9.mp4
[download] 100% of    3.16MiB in 00:00:00 at 26.52MiB/s    


Processing videos:  17%|███████████████                                                                        | 9/52 [01:55<06:34,  9.16s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=oJrNJvlvIEI
[youtube] oJrNJvlvIEI: Downloading webpage
[youtube] oJrNJvlvIEI: Downloading ios player API JSON
[youtube] oJrNJvlvIEI: Downloading android player API JSON
[youtube] oJrNJvlvIEI: Downloading m3u8 information
[youtube] oJrNJvlvIEI: Downloading MPD manifest
[info] oJrNJvlvIEI: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_10.mp4
[download] 100% of    3.16MiB in 00:00:00 at 6.10MiB/s     


Processing videos:  19%|████████████████▌                                                                     | 10/52 [02:00<05:23,  7.70s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=mMOIbKjlWTQ
[youtube] mMOIbKjlWTQ: Downloading webpage
[youtube] mMOIbKjlWTQ: Downloading ios player API JSON
[youtube] mMOIbKjlWTQ: Downloading android player API JSON
[youtube] mMOIbKjlWTQ: Downloading m3u8 information
[youtube] mMOIbKjlWTQ: Downloading MPD manifest
[info] mMOIbKjlWTQ: Downloading 1 format(s): 18
[download] Destination: temp_folder/everyday_11.mp4
[download] 100% of  179.21KiB in 00:00:00 at 541.97KiB/s 


Processing videos:  21%|██████████████████▏                                                                   | 11/52 [02:04<04:35,  6.71s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=oDnz2t98VYk
[youtube] oDnz2t98VYk: Downloading webpage
[youtube] oDnz2t98VYk: Downloading ios player API JSON
[youtube] oDnz2t98VYk: Downloading android player API JSON


ERROR: [youtube] oDnz2t98VYk: Private video. Sign in if you've been granted access to this video
Processing videos:  23%|███████████████████▊                                                                  | 12/52 [02:05<03:16,  4.90s/it]

[Errno 2] No such file or directory: 'temp_folder/everyday_12.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=2lzLDsoVWww
[youtube] 2lzLDsoVWww: Downloading webpage
[youtube] 2lzLDsoVWww: Downloading ios player API JSON


ERROR: [youtube] 2lzLDsoVWww: Video unavailable
Processing videos:  25%|█████████████████████▌                                                                | 13/52 [02:06<02:22,  3.65s/it]

[youtube] 2lzLDsoVWww: Downloading android player API JSON
[Errno 2] No such file or directory: 'temp_folder/everyday_13.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=Cv5avR9nEhg
[youtube] Cv5avR9nEhg: Downloading webpage
[youtube] Cv5avR9nEhg: Downloading ios player API JSON
[youtube] Cv5avR9nEhg: Downloading android player API JSON
[youtube] Cv5avR9nEhg: Downloading m3u8 information
[info] Cv5avR9nEhg: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_14.mp4
[download] 100% of  103.49MiB in 00:00:02 at 40.26MiB/s    


Processing videos:  27%|███████████████████████▏                                                              | 14/52 [03:01<12:10, 19.23s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=eRZW3DPxDGk
[youtube] eRZW3DPxDGk: Downloading webpage
[youtube] eRZW3DPxDGk: Downloading ios player API JSON
[youtube] eRZW3DPxDGk: Downloading android player API JSON
[youtube] eRZW3DPxDGk: Downloading m3u8 information
[youtube] eRZW3DPxDGk: Downloading MPD manifest
[info] eRZW3DPxDGk: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_15.mp4
[download] 100% of  128.61MiB in 00:00:03 at 32.36MiB/s    


Processing videos:  29%|████████████████████████▊                                                             | 15/52 [03:32<14:05, 22.84s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=f4WV4NHlmuA
[youtube] f4WV4NHlmuA: Downloading webpage
[youtube] f4WV4NHlmuA: Downloading ios player API JSON
[youtube] f4WV4NHlmuA: Downloading android player API JSON
[youtube] f4WV4NHlmuA: Downloading m3u8 information
[info] f4WV4NHlmuA: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_16.mp4
[download] 100% of  360.86KiB in 00:00:00 at 891.39KiB/s   


Processing videos:  31%|██████████████████████████▍                                                           | 16/52 [03:37<10:30, 17.52s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=R_ES8RZua1g
[youtube] R_ES8RZua1g: Downloading webpage
[youtube] R_ES8RZua1g: Downloading ios player API JSON
[youtube] R_ES8RZua1g: Downloading android player API JSON
[youtube] R_ES8RZua1g: Downloading m3u8 information
[info] R_ES8RZua1g: Downloading 1 format(s): 18
[download] Destination: temp_folder/everyday_17.mp4
[download] 100% of   22.84MiB in 00:00:00 at 34.05MiB/s    


Processing videos:  33%|████████████████████████████                                                          | 17/52 [03:57<10:37, 18.22s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=xGnOT9Gp0F0
[youtube] xGnOT9Gp0F0: Downloading webpage
[youtube] xGnOT9Gp0F0: Downloading ios player API JSON
[youtube] xGnOT9Gp0F0: Downloading android player API JSON
[youtube] xGnOT9Gp0F0: Downloading m3u8 information
[info] xGnOT9Gp0F0: Downloading 1 format(s): 22
[download] Destination: temp_folder/everyday_18.mp4
[download] 100% of   22.87MiB in 00:00:00 at 29.07MiB/s    


Processing videos:  35%|█████████████████████████████▊                                                        | 18/52 [04:06<08:46, 15.48s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=sErq0TJMKEo
[youtube] sErq0TJMKEo: Downloading webpage
[youtube] sErq0TJMKEo: Downloading ios player API JSON
[youtube] sErq0TJMKEo: Downloading android player API JSON
[youtube] sErq0TJMKEo: Downloading m3u8 information
[info] sErq0TJMKEo: Downloading 1 format(s): 18
[download] Destination: temp_folder/everyday_19.mp4
[download] 100% of    9.31MiB in 00:00:00 at 63.03MiB/s    


Processing videos:  37%|███████████████████████████████▍                                                      | 19/52 [04:22<08:31, 15.51s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=73icFhednQU
[youtube] 73icFhednQU: Downloading webpage
[youtube] 73icFhednQU: Downloading ios player API JSON
[youtube] 73icFhednQU: Downloading android player API JSON


ERROR: [youtube] 73icFhednQU: Private video. Sign in if you've been granted access to this video
Processing videos:  38%|█████████████████████████████████                                                     | 20/52 [04:23<05:55, 11.10s/it]

[Errno 2] No such file or directory: 'temp_folder/everyday_20.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=RWYus9H4YrQ
[youtube] RWYus9H4YrQ: Downloading webpage
[youtube] RWYus9H4YrQ: Downloading ios player API JSON
[youtube] RWYus9H4YrQ: Downloading android player API JSON
[youtube] RWYus9H4YrQ: Downloading m3u8 information
[info] RWYus9H4YrQ: Downloading 1 format(s): 18
[download] Destination: temp_folder/example.mp4
[download] 100% of   69.77MiB in 00:00:04 at 15.16MiB/s    


Processing videos:  40%|██████████████████████████████████▋                                                   | 21/52 [05:08<10:58, 21.24s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=RWYus9H4YrQ
[youtube] RWYus9H4YrQ: Downloading webpage
[youtube] RWYus9H4YrQ: Downloading ios player API JSON
[youtube] RWYus9H4YrQ: Downloading android player API JSON
[youtube] RWYus9H4YrQ: Downloading m3u8 information
[info] RWYus9H4YrQ: Downloading 1 format(s): 18
[download] Destination: temp_folder/example_2.mp4
[download] 100% of   69.77MiB in 00:00:03 at 21.43MiB/s    


Processing videos:  42%|████████████████████████████████████▍                                                 | 22/52 [05:52<14:01, 28.05s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=DZEs6UDnPHA
[youtube] DZEs6UDnPHA: Downloading webpage
[youtube] DZEs6UDnPHA: Downloading ios player API JSON
[youtube] DZEs6UDnPHA: Downloading android player API JSON
[youtube] DZEs6UDnPHA: Downloading m3u8 information
[info] DZEs6UDnPHA: Downloading 1 format(s): 18
[download] Destination: temp_folder/example_3.mp4
[download] 100% of   35.42MiB in 00:00:01 at 21.60MiB/s    


Processing videos:  44%|██████████████████████████████████████                                                | 23/52 [06:15<12:57, 26.80s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=qFpB4TYO2Ec
[youtube] qFpB4TYO2Ec: Downloading webpage
[youtube] qFpB4TYO2Ec: Downloading ios player API JSON
[youtube] qFpB4TYO2Ec: Downloading android player API JSON
[youtube] qFpB4TYO2Ec: Downloading m3u8 information
[youtube] qFpB4TYO2Ec: Downloading MPD manifest
[info] qFpB4TYO2Ec: Downloading 1 format(s): 22
[download] Destination: temp_folder/example_4.mp4
[download] 100% of  857.03KiB in 00:00:00 at 1.05MiB/s     


Processing videos:  46%|███████████████████████████████████████▋                                              | 24/52 [06:21<09:36, 20.59s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=RWYus9H4YrQ
[youtube] RWYus9H4YrQ: Downloading webpage
[youtube] RWYus9H4YrQ: Downloading ios player API JSON
[youtube] RWYus9H4YrQ: Downloading android player API JSON
[youtube] RWYus9H4YrQ: Downloading m3u8 information
[info] RWYus9H4YrQ: Downloading 1 format(s): 18
[download] Destination: temp_folder/example_5.mp4
[download] 100% of   69.77MiB in 00:00:01 at 38.06MiB/s    


Processing videos:  48%|█████████████████████████████████████████▎                                            | 25/52 [07:03<12:08, 26.98s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=RWYus9H4YrQ
[youtube] RWYus9H4YrQ: Downloading webpage
[youtube] RWYus9H4YrQ: Downloading ios player API JSON
[youtube] RWYus9H4YrQ: Downloading android player API JSON
[youtube] RWYus9H4YrQ: Downloading m3u8 information
[info] RWYus9H4YrQ: Downloading 1 format(s): 18
[download] Destination: temp_folder/example_6.mp4
[download] 100% of   69.77MiB in 00:00:01 at 40.60MiB/s    


Processing videos:  50%|███████████████████████████████████████████                                           | 26/52 [07:46<13:41, 31.58s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=DZEs6UDnPHA
[youtube] DZEs6UDnPHA: Downloading webpage
[youtube] DZEs6UDnPHA: Downloading ios player API JSON
[youtube] DZEs6UDnPHA: Downloading android player API JSON
[youtube] DZEs6UDnPHA: Downloading m3u8 information
[info] DZEs6UDnPHA: Downloading 1 format(s): 18
[download] Destination: temp_folder/example_7.mp4
[download] 100% of   35.42MiB in 00:00:00 at 51.24MiB/s    


Processing videos:  52%|████████████████████████████████████████████▋                                         | 27/52 [08:09<12:08, 29.12s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=qFpB4TYO2Ec
[youtube] qFpB4TYO2Ec: Downloading webpage
[youtube] qFpB4TYO2Ec: Downloading ios player API JSON
[youtube] qFpB4TYO2Ec: Downloading android player API JSON
[youtube] qFpB4TYO2Ec: Downloading m3u8 information
[youtube] qFpB4TYO2Ec: Downloading MPD manifest
[info] qFpB4TYO2Ec: Downloading 1 format(s): 22
[download] Destination: temp_folder/example_8.mp4
[download] 100% of  857.03KiB in 00:00:00 at 6.50MiB/s     


Processing videos:  54%|██████████████████████████████████████████████▎                                       | 28/52 [08:14<08:44, 21.85s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=RWYus9H4YrQ
[youtube] RWYus9H4YrQ: Downloading webpage
[youtube] RWYus9H4YrQ: Downloading ios player API JSON
[youtube] RWYus9H4YrQ: Downloading android player API JSON
[youtube] RWYus9H4YrQ: Downloading m3u8 information
[info] RWYus9H4YrQ: Downloading 1 format(s): 18
[download] Destination: temp_folder/example_9.mp4
[download] 100% of   69.77MiB in 00:00:03 at 17.70MiB/s    


Processing videos:  56%|███████████████████████████████████████████████▉                                      | 29/52 [08:59<10:59, 28.66s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=RWYus9H4YrQ
[youtube] RWYus9H4YrQ: Downloading webpage
[youtube] RWYus9H4YrQ: Downloading ios player API JSON
[youtube] RWYus9H4YrQ: Downloading android player API JSON
[youtube] RWYus9H4YrQ: Downloading m3u8 information
[info] RWYus9H4YrQ: Downloading 1 format(s): 18
[download] Destination: temp_folder/example_10.mp4
[download] 100% of   69.77MiB in 00:00:02 at 30.77MiB/s    


Processing videos:  58%|█████████████████████████████████████████████████▌                                    | 30/52 [09:47<12:43, 34.71s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=DZEs6UDnPHA
[youtube] DZEs6UDnPHA: Downloading webpage
[youtube] DZEs6UDnPHA: Downloading ios player API JSON
[youtube] DZEs6UDnPHA: Downloading android player API JSON
[youtube] DZEs6UDnPHA: Downloading m3u8 information
[info] DZEs6UDnPHA: Downloading 1 format(s): 18
[download] Destination: temp_folder/example_11.mp4
[download] 100% of   35.42MiB in 00:00:01 at 22.72MiB/s    


Processing videos:  60%|███████████████████████████████████████████████████▎                                  | 31/52 [10:11<11:00, 31.44s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=qFpB4TYO2Ec
[youtube] qFpB4TYO2Ec: Downloading webpage
[youtube] qFpB4TYO2Ec: Downloading ios player API JSON
[youtube] qFpB4TYO2Ec: Downloading android player API JSON
[youtube] qFpB4TYO2Ec: Downloading m3u8 information
[youtube] qFpB4TYO2Ec: Downloading MPD manifest
[info] qFpB4TYO2Ec: Downloading 1 format(s): 22
[download] Destination: temp_folder/example_12.mp4
[download] 100% of  857.03KiB in 00:00:00 at 928.23KiB/s   


Processing videos:  62%|████████████████████████████████████████████████████▉                                 | 32/52 [10:17<07:56, 23.81s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=c2W6TVd_xh4
[youtube] c2W6TVd_xh4: Downloading webpage
[youtube] c2W6TVd_xh4: Downloading ios player API JSON
[youtube] c2W6TVd_xh4: Downloading android player API JSON
[youtube] c2W6TVd_xh4: Downloading m3u8 information
[info] c2W6TVd_xh4: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited.mp4
[download] 100% of    8.90MiB in 00:00:00 at 22.12MiB/s    


Processing videos:  63%|██████████████████████████████████████████████████████▌                               | 33/52 [10:28<06:16, 19.83s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=7YYB3BEoksc
[youtube] 7YYB3BEoksc: Downloading webpage
[youtube] 7YYB3BEoksc: Downloading ios player API JSON
[youtube] 7YYB3BEoksc: Downloading android player API JSON


ERROR: [youtube] 7YYB3BEoksc: Private video. Sign in if you've been granted access to this video
Processing videos:  65%|████████████████████████████████████████████████████████▏                             | 34/52 [10:28<04:14, 14.11s/it]

[Errno 2] No such file or directory: 'temp_folder/excited_2.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=7YYB3BEoksc
[youtube] 7YYB3BEoksc: Downloading webpage
[youtube] 7YYB3BEoksc: Downloading ios player API JSON


ERROR: [youtube] 7YYB3BEoksc: Private video. Sign in if you've been granted access to this video
Processing videos:  67%|█████████████████████████████████████████████████████████▉                            | 35/52 [10:29<02:51, 10.11s/it]

[youtube] 7YYB3BEoksc: Downloading android player API JSON
[Errno 2] No such file or directory: 'temp_folder/excited_3.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=2BCfrE5hjjY
[youtube] 2BCfrE5hjjY: Downloading webpage
[youtube] 2BCfrE5hjjY: Downloading ios player API JSON
[youtube] 2BCfrE5hjjY: Downloading android player API JSON
[youtube] 2BCfrE5hjjY: Downloading m3u8 information
[info] 2BCfrE5hjjY: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_4.mp4
[download] 100% of  390.64KiB in 00:00:00 at 1.89MiB/s     


Processing videos:  69%|███████████████████████████████████████████████████████████▌                          | 36/52 [10:34<02:18,  8.65s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=fKjsdtMU3fc
[youtube] fKjsdtMU3fc: Downloading webpage
[youtube] fKjsdtMU3fc: Downloading ios player API JSON
[youtube] fKjsdtMU3fc: Downloading android player API JSON
[youtube] fKjsdtMU3fc: Downloading m3u8 information
[info] fKjsdtMU3fc: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_5.mp4
[download] 100% of   47.11MiB in 00:00:02 at 16.69MiB/s    


Processing videos:  71%|█████████████████████████████████████████████████████████████▏                        | 37/52 [10:56<03:09, 12.63s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=fKjsdtMU3fc
[youtube] fKjsdtMU3fc: Downloading webpage
[youtube] fKjsdtMU3fc: Downloading ios player API JSON
[youtube] fKjsdtMU3fc: Downloading android player API JSON
[youtube] fKjsdtMU3fc: Downloading m3u8 information
[info] fKjsdtMU3fc: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_6.mp4
[download] 100% of   47.11MiB in 00:00:06 at 7.26MiB/s     


Processing videos:  73%|██████████████████████████████████████████████████████████████▊                       | 38/52 [11:23<03:57, 16.95s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=D7UYn37qTkU
[youtube] D7UYn37qTkU: Downloading webpage
[youtube] D7UYn37qTkU: Downloading ios player API JSON
[youtube] D7UYn37qTkU: Downloading android player API JSON


ERROR: [youtube] D7UYn37qTkU: Video unavailable
Processing videos:  75%|████████████████████████████████████████████████████████████████▌                     | 39/52 [11:24<02:37, 12.11s/it]

[Errno 2] No such file or directory: 'temp_folder/excited_7.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=mONu31r-eaM
[youtube] mONu31r-eaM: Downloading webpage
[youtube] mONu31r-eaM: Downloading ios player API JSON
[youtube] mONu31r-eaM: Downloading android player API JSON
[youtube] mONu31r-eaM: Downloading m3u8 information
[info] mONu31r-eaM: Downloading 1 format(s): 18
[download] Destination: temp_folder/excited_8.mp4
[download] 100% of    1.51MiB in 00:00:00 at 5.58MiB/s     


Processing videos:  77%|██████████████████████████████████████████████████████████████████▏                   | 40/52 [11:31<02:04, 10.41s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=fnhiGz4qE_I
[youtube] fnhiGz4qE_I: Downloading webpage
[youtube] fnhiGz4qE_I: Downloading ios player API JSON
[youtube] fnhiGz4qE_I: Downloading android player API JSON
[youtube] fnhiGz4qE_I: Downloading m3u8 information
[info] fnhiGz4qE_I: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_9.mp4
[download] 100% of   21.13MiB in 00:00:00 at 23.49MiB/s    


Processing videos:  79%|███████████████████████████████████████████████████████████████████▊                  | 41/52 [11:38<01:45,  9.58s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=fnhiGz4qE_I
[youtube] fnhiGz4qE_I: Downloading webpage
[youtube] fnhiGz4qE_I: Downloading ios player API JSON
[youtube] fnhiGz4qE_I: Downloading android player API JSON
[youtube] fnhiGz4qE_I: Downloading m3u8 information
[info] fnhiGz4qE_I: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_10.mp4
[download] 100% of   21.13MiB in 00:00:01 at 18.49MiB/s    


Processing videos:  81%|█████████████████████████████████████████████████████████████████████▍                | 42/52 [11:46<01:30,  9.04s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=iMUjcZCLaGo
[youtube] iMUjcZCLaGo: Downloading webpage
[youtube] iMUjcZCLaGo: Downloading ios player API JSON


ERROR: [youtube] iMUjcZCLaGo: Private video. Sign in if you've been granted access to this video
Processing videos:  83%|███████████████████████████████████████████████████████████████████████               | 43/52 [11:47<00:58,  6.55s/it]

[youtube] iMUjcZCLaGo: Downloading android player API JSON
[Errno 2] No such file or directory: 'temp_folder/excited_11.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=Dax964vUumQ
[youtube] Dax964vUumQ: Downloading webpage
[youtube] Dax964vUumQ: Downloading ios player API JSON
[youtube] Dax964vUumQ: Downloading android player API JSON


ERROR: [youtube] Dax964vUumQ: Private video. Sign in if you've been granted access to this video
Processing videos:  85%|████████████████████████████████████████████████████████████████████████▊             | 44/52 [11:48<00:38,  4.82s/it]

[Errno 2] No such file or directory: 'temp_folder/excited_12.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=Dax964vUumQ
[youtube] Dax964vUumQ: Downloading webpage
[youtube] Dax964vUumQ: Downloading ios player API JSON
[youtube] Dax964vUumQ: Downloading android player API JSON


ERROR: [youtube] Dax964vUumQ: Private video. Sign in if you've been granted access to this video
Processing videos:  87%|██████████████████████████████████████████████████████████████████████████▍           | 45/52 [11:48<00:25,  3.60s/it]

[Errno 2] No such file or directory: 'temp_folder/excited_13.mp4'
[youtube] Extracting URL: https://www.youtube.com/watch?v=CSj7IScvZnE
[youtube] CSj7IScvZnE: Downloading webpage
[youtube] CSj7IScvZnE: Downloading ios player API JSON
[youtube] CSj7IScvZnE: Downloading android player API JSON
[youtube] CSj7IScvZnE: Downloading m3u8 information
[info] CSj7IScvZnE: Downloading 1 format(s): 18
[download] Destination: temp_folder/excited_14.mp4
[download] 100% of    6.13MiB in 00:00:00 at 23.03MiB/s    


Processing videos:  88%|████████████████████████████████████████████████████████████████████████████          | 46/52 [12:00<00:36,  6.07s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=cF6XScPZDsE
[youtube] cF6XScPZDsE: Downloading webpage
[youtube] cF6XScPZDsE: Downloading ios player API JSON
[youtube] cF6XScPZDsE: Downloading android player API JSON
[youtube] cF6XScPZDsE: Downloading m3u8 information
[info] cF6XScPZDsE: Downloading 1 format(s): 18
[download] Destination: temp_folder/excited_15.mp4
[download] 100% of   32.14MiB in 00:00:01 at 30.31MiB/s    


Processing videos:  90%|█████████████████████████████████████████████████████████████████████████████▋        | 47/52 [12:22<00:53, 10.77s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=ZG4u7LDCN8A
[youtube] ZG4u7LDCN8A: Downloading webpage
[youtube] ZG4u7LDCN8A: Downloading ios player API JSON
[youtube] ZG4u7LDCN8A: Downloading android player API JSON
[youtube] ZG4u7LDCN8A: Downloading m3u8 information
[info] ZG4u7LDCN8A: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_16.mp4
[download] 100% of   18.89MiB in 00:00:01 at 16.37MiB/s    


Processing videos:  92%|███████████████████████████████████████████████████████████████████████████████▍      | 48/52 [12:36<00:47, 11.83s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=uPKOwziViaw
[youtube] uPKOwziViaw: Downloading webpage
[youtube] uPKOwziViaw: Downloading ios player API JSON
[youtube] uPKOwziViaw: Downloading android player API JSON
[youtube] uPKOwziViaw: Downloading m3u8 information
[info] uPKOwziViaw: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_17.mp4
[download] 100% of   63.76MiB in 00:00:02 at 24.34MiB/s    


Processing videos:  94%|█████████████████████████████████████████████████████████████████████████████████     | 49/52 [13:10<00:55, 18.45s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=uPKOwziViaw
[youtube] uPKOwziViaw: Downloading webpage
[youtube] uPKOwziViaw: Downloading ios player API JSON
[youtube] uPKOwziViaw: Downloading android player API JSON
[youtube] uPKOwziViaw: Downloading m3u8 information
[info] uPKOwziViaw: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_18.mp4
[download] 100% of   63.76MiB in 00:00:01 at 42.17MiB/s    


Processing videos:  96%|██████████████████████████████████████████████████████████████████████████████████▋   | 50/52 [13:41<00:44, 22.15s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=Eq6SnaimpzQ
[youtube] Eq6SnaimpzQ: Downloading webpage
[youtube] Eq6SnaimpzQ: Downloading ios player API JSON
[youtube] Eq6SnaimpzQ: Downloading android player API JSON
[youtube] Eq6SnaimpzQ: Downloading m3u8 information
[info] Eq6SnaimpzQ: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_19.mp4
[download] 100% of   54.64MiB in 00:00:02 at 18.88MiB/s    


Processing videos:  98%|████████████████████████████████████████████████████████████████████████████████████▎ | 51/52 [14:02<00:21, 21.83s/it]

[youtube] Extracting URL: https://www.youtube.com/watch?v=96HY0Pcl_e4
[youtube] 96HY0Pcl_e4: Downloading webpage
[youtube] 96HY0Pcl_e4: Downloading ios player API JSON
[youtube] 96HY0Pcl_e4: Downloading android player API JSON
[youtube] 96HY0Pcl_e4: Downloading m3u8 information
[youtube] 96HY0Pcl_e4: Downloading MPD manifest
[info] 96HY0Pcl_e4: Downloading 1 format(s): 22
[download] Destination: temp_folder/excited_20.mp4
[download] 100% of   54.80MiB in 00:00:01 at 29.07MiB/s    


Processing videos: 100%|██████████████████████████████████████████████████████████████████████████████████████| 52/52 [14:20<00:00, 16.55s/it]

CPU times: user 17min 47s, sys: 1min 56s, total: 19min 43s
Wall time: 14min 20s





In [None]:
def get_instance_id():
    """Get current instance ID from metadata"""
    url = "http://169.254.169.254/latest/meta-data/instance-id"
    response = requests.get(url)
    return response.text

In [None]:
def stop_instance(instance_id, region_name='us-west-2'):
    """Stop the EC2 instance"""
    ec2 = boto3.client('ec2', aws_access_key_id = aws_access_key_id, aws_secret_access_key = aws_secret_access_key, region_name=region_name)
    ec2.stop_instances(InstanceIds=[instance_id])

In [None]:
# Get the current instance ID
instance_id = get_instance_id()
print(instance_id)
# Stop the instance
stop_instance(instance_id)