In [None]:
import pandas as pd
import numpy as np
import boto3
import cv2 as cv
import os
import json
from config import key_, secret_, s3_bucket, kaggle_cookie

In [None]:
def download_video_from_s3_bucket(video_name, aws_key=key_, aws_secret=secret_, bucket=s3_bucket):
    '''
    ##Intended for use when not using Sagemaker##
    takes a video name as input, and returns the frame count, and the shape of the first frame of the video
    '''
    s3 = boto3.client('s3',
                      aws_access_key_id=key_, 
                      aws_secret_access_key=secret_,
                      region_name='us-east-2', #region is hardcoded - this is not a security risk to keep public
                      config= boto3.session.Config(signature_version='s3v4')) #the sig version needs to be s3v4 or the url will error
    video_url = s3.generate_presigned_url('get_object',
                                        Params={"Bucket": bucket,
                                               'Key': video_name},
                                        ExpiresIn=60)
    video = cv.VideoCapture(video_url)
    frame_count = int(video.get(cv.CAP_PROP_FRAME_COUNT))
    _, frame_array = video.read()
    frame_x, frame_y, RGB = frame_array.shape
    video.release()
    return video_name, frame_count, frame_x, frame_y, RGB

In [None]:
with open('meta.json') as m:
    meta = json.load(m)

In [None]:
video_names = []
frame_counts = []
x_shapes = []
y_shapes = []
RGB_shapes = []

In [None]:
for video in meta:
    video_, frames, x_frames, y_frames, RGB = download_video_from_s3_bucket
    video_names.append(video_)
    frame_counts.append(frames)
    x_shapes.append(x_frames)
    y_shapes.append(y_frames)
    RGB_shapes.append(RGB)

In [None]:
video_information_dictionary = {'video_names': video_names, 'number_of_frames': frame_counts, 
                               'number_of_x_pixels': x_shapes, 'number_of_y_pixels': y_shapes,
                               'RGB': RGB_shapes}

In [None]:
video_info_df = pd.DataFrame(video_information_dictionary)

In [None]:
video_info_df.to_csv('video_information.csv')

In [None]:
s3 = boto3.client('s3', aws_access_key_id=key_, aws_secret_access_key=secret_)

In [None]:
s3.upload_file('video_information.csv', s3_bucket, 'video_information.csv')