In [309]:
import csv
import pandas as pd
import os
import subprocess
import sys
import datetime
import random

In [315]:
data = pd.read_csv("golf-swing-dataset/data/golfDB.csv", sep=",", usecols=["id", "youtube_id", "sex", "club", "slow", "kf7"])
data = data.rename(columns={"kf7" : "impactFrame", "youtube_id" : "youtubeId"})
data.slow = data.slow.apply(lambda x: True if x == 1 else x).apply(lambda x: False if x == 0 else x)
data = data[data["slow"] == False].drop("slow", axis=1)
data.head()

Unnamed: 0,id,youtubeId,sex,club,impactFrame
0,0,f1BWA5F87Jc,f,driver,498
2,2,tA1iotgtMyc,m,driver,698
4,4,wDCKLePrwHA,f,driver,205
6,6,iPuVhnI8pJU,m,driver,332
8,8,-M5SITXMA2Y,f,driver,355


In [339]:
basePath = "/Users/metinozturk/Downloads/AI/golf-swing-dataset/videos/"
basePathControl = "/Users/metinozturk/Downloads/AI/UrbanSound8K/"

In [340]:
_ = [extractAudioFromVideo(x) for x in data["id"]]

In [326]:
metadata = pd.read_csv(f'{basePathControl}metadata/UrbanSound8K.csv')
metadata.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [343]:
for index, row in metadata.iterrows():
    extractAudioFromControlVideo(index)

In [337]:
def extractAudioFromVideo(videoIdx):
    fileName = f"{basePath}{data.loc[videoIdx].id + 1}-{data.loc[videoIdx].youtubeId}.mp4"
    frameRate = getFrameRate(fileName)
    videoDuration = getVideoDuration(fileName)
    
    if videoDuration == -1 or frameRate == -1:
        return

    impactTime = round(data.loc[videoIdx].impactFrame / frameRate, 2)
    
    impactPreDuration = random.uniform(0, 0.5)
    impactPostDuration = 1 - impactPreDuration

    startTime = round(0 if impactTime < impactPreDuration else impactTime - impactPreDuration, 2)
    endTime = round(videoDuration - impactTime if impactTime + impactPostDuration > videoDuration else impactTime + impactPostDuration, 2)

    startTime = datetime.timedelta(seconds=startTime)
    endTime = datetime.timedelta(seconds=endTime)


    command = (f"ffmpeg -y -i {fileName} "
           f"-ss {startTime} -to {endTime} {fileName.replace('golf-swing-dataset/videos', 'audios').replace('.mp4', '.wav')}"
          )

    subprocess.call(command, shell=True)

In [341]:
def extractAudioFromControlVideo(videoIdx):
    fileName = f"{basePathControl}audio/fold{metadata.loc[videoIdx]['fold']}/{metadata.loc[videoIdx]['slice_file_name']}"
    
    command = (f"ffmpeg -y -i {fileName} "
           f"-ss {datetime.timedelta(seconds=0)} -to {datetime.timedelta(seconds=1)} /Users/metinozturk/Downloads/AI/controlAudios/{metadata.loc[videoIdx]['slice_file_name']}"
          )
    subprocess.call(command, shell=True)

In [202]:
def getFrameRate(filename):
    if not os.path.exists(filename):
        return -1         
    out = subprocess.check_output(["ffprobe",filename,"-v","0","-select_streams","v","-print_format","flat","-show_entries","stream=r_frame_rate"])
    rate = out.decode().split('=')[1].strip()[1:-1].split('/')
    if len(rate)==1:
        return float(rate[0])
    if len(rate)==2:
        return float(rate[0])/float(rate[1])
    return -1

In [203]:
def getVideoDuration(filename):
    if not os.path.exists(filename):
        return -1         
    out = subprocess.check_output(["ffprobe","-v","error","-show_entries","format=duration","-of","default=noprint_wrappers=1:nokey=1", filename])
    rate = out.decode().strip()
    return float(rate)