<a href="https://colab.research.google.com/github/YantCaccia/Tirocinio/blob/main/GRID_Download%26Preparation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Word-level prediction (we tryin)

#Download dataset

In [None]:
%%shell
#preparing for download 
mkdir "gridcorpus"
cd "gridcorpus"
mkdir "raw"
cd "raw" && mkdir "video" "align"

for i in `seq $1 $2`
do
    printf "\n\n------------------------- Downloading $i th speaker -------------------------\n\n"
    
    #download the video of the ith speaker
    cd "align" && curl "http://spandh.dcs.shef.ac.uk/gridcorpus/s$i/align/s$i.tar" > "s$i.tar" && cd ..
    cd "video" && curl "http://spandh.dcs.shef.ac.uk/gridcorpus/s$i/video/s$i.mpg_vcd.zip" > "s$i.zip" && cd ..

done

#Unzip raw from Drive

In [None]:
#10 min exec time
%%shell
cd /content
mkdir "intermediateDataset"
cd intermediateDataset
for i in `seq 1 20`
do
  unzip -q "/content/drive/MyDrive/tirocinioWorkingDirectory/datasets/grid/gridcorpus/raw/video/s$i.zip" -d "/content/intermediateDataset"
  tar -xf "/content/drive/MyDrive/tirocinioWorkingDirectory/datasets/grid/gridcorpus/raw/align/s$i.tar" -C "/content/intermediateDataset/s$i"
done
for i in `seq 22 34`
do
  unzip -q "/content/drive/MyDrive/tirocinioWorkingDirectory/datasets/grid/gridcorpus/raw/video/s$i.zip" -d "/content/intermediateDataset"
  tar -xf "/content/drive/MyDrive/tirocinioWorkingDirectory/datasets/grid/gridcorpus/raw/align/s$i.tar" -C "/content/intermediateDataset/s$i"
done

#Structure dataset

In [None]:
#exec time 4h 30m

import os
import shutil
import concurrent.futures
import cv2
import urllib.request as urlreq
import subprocess

myPath = "/content/intermediateDataset"
VIDEO_MARGIN = 0.02
PIXEL_MARGIN = 15

# -- FACE DETECTION GOODIES -- #
# save face detection algorithm's url in haarcascade_url variable
haarcascade_url = "https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_alt2.xml"

# save face detection algorithm's name as haarcascade
haarcascade = "haarcascade_frontalface_alt2.xml"

# check if file is in working directory
if not (haarcascade in os.listdir(os.curdir)):
  urlreq.urlretrieve(haarcascade_url, haarcascade)
# ---------------------------- #

# -- FACE LANDMARKS GOODIES -- #
# save facial landmark detection model's url in LBFmodel_url variable
LBFmodel_url = "https://github.com/kurnianggoro/GSOC2017/raw/master/data/lbfmodel.yaml"

# save facial landmark detection model's name as LBFmodel
LBFmodel = "LFBmodel.yaml"

# check if file is in working directory (if not download it)
if not (LBFmodel in os.listdir(os.curdir)):
  urlreq.urlretrieve(LBFmodel_url, LBFmodel)

# ---------------------------- #

# ---- UTILITY FUNCTIONS ---- #
def fromFrameToSec(frame):
  TOT_FRAME = 75
  TOT_S = 3
  normFrame = (frame + 500) / 1000
  return ((TOT_S * normFrame)/(TOT_FRAME))

def extractClassSentenceEdition(fileName):
    with open(fileName, "r") as f:
        return "".join(word.capitalize() for word in [line.split()[2] for line in f.readlines()] if word not in ["sil", "sp"])

def extractClassWordEdition(fileName):
    with open(fileName, "r") as f:
        for line in f.readlines():
            info = line.split()
            startFrame = int(info[0])
            finishFrame = int(info[1])
            word = info[2]
            if word not in  ["sil", "sp"]:
                yield (fromFrameToSec(startFrame) - VIDEO_MARGIN, fromFrameToSec(finishFrame) + VIDEO_MARGIN, word)

def extractInfoFromEntry(entry, pathToSpeaker):
  id = entry[:-4]
  filename = id + ".align"
  srcPath = os.path.join(pathToSpeaker, entry)
  return (id, filename, srcPath)

def extractMouthLandmarkFromVideo(videoPath):

  # from video to single frame (frame to be used as "image")
  cap = cv2.VideoCapture(videoPath)
  cap.set(1, 1); # primo argomento è una costante, secondo argomento è il frame che voglio
  _, image = cap.read() # primo valore restituito è bool (successful or not), secondo valore è il frame

  # create an instance of the Face Detection Cascade Classifier
  detector = cv2.CascadeClassifier(haarcascade)

  # detect faces using the haarcascade classifier on the "image"
  faces = detector.detectMultiScale(image)

  # create an instance of the Facial landmark Detector with the model
  landmark_detector  = cv2.face.createFacemarkLBF()
  landmark_detector.loadModel(LBFmodel)

  # Detect landmarks on "image"
  _, landmarks = landmark_detector.fit(image, faces)

  # coordinates of mouth landmarks (48th element in the landmarks array)
  mouthX, mouthY = landmarks[0][0][48]

  return (mouthX - PIXEL_MARGIN, mouthY - PIXEL_MARGIN)

def extractMouthLandmarkFromVideoWrapper(pathToSpeaker):
  # mouth info from first video in the directory
  for entry in os.listdir(pathToSpeaker):
    if entry.endswith(".mpg"):
      mouthX, mouthY = extractMouthLandmarkFromVideo(os.path.join(pathToSpeaker, entry))
      return (mouthX, mouthY) #loop should end
# --------------------------- #


def workerWordEdition(i, entry, pathToSpeaker, mouthX, mouthY):
  if entry.endswith(".mpg"):
    id, filename, srcPath = extractInfoFromEntry(entry, pathToSpeaker) # info sul nome del file
    for wordInfo in extractClassWordEdition(os.path.join(pathToSpeaker, "align", filename)): # per ogni parola nel video
      startTime, finishTime, word = wordInfo
      title = "WORD{}SP{}SEN{}.mpg".format(word, i, id)
      #metto il video tagliato in /{word}/{title}.mpg
      dstDirPath = os.path.join("/content/myFinalDataset", word)
      dstPath = os.path.join(dstDirPath, title)
      if not os.path.exists(dstDirPath):
        os.mkdir(dstDirPath)
      #trim, crop, greyscale, remove audio and save video
      os.system("ffmpeg -i {input} -ss 00:00:{sTime} -to 00:00:{fTime} -fflags +genpts -an -vf crop=80:40:{topLeftX}:{topLeftY},format=gray {output}".format(sTime=startTime, input=srcPath, fTime=finishTime, topLeftX = mouthX, topLeftY = mouthY, output=dstPath))
      #this will check the number of frame in the video just created
      command = "ffmpeg -i {} -map 0:v:0 -c copy -f null -y /dev/null 2>&1 | grep -Eo 'frame= *[0-9]+ *' | grep -Eo '[0-9]+' | tail -1".format(dstPath)
      try:
        frameNumber = int(subprocess.run(command, capture_output=True, text=True, shell=True).stdout)
        #delete video if its frame count is < 3 (not very clever but still)
        if frameNumber < 3:
          os.system("rm {}".format(dstPath))
      except ValueError:
        #frameCount not available -> video corrupted -> delete it!
        os.system("rm {}".format(dstPath))
        pass

#os.system("rm -rf myFinalDataset/") #just to be sure
os.mkdir("/content/myFinalDataset")

#for i in [*range(1, 21), *range(22, 35)]:
for i in [*range(1,21), *range(22,35)]:
  pathToSpeaker = os.path.join(myPath, "s{}".format(str(i)))
  mouthX, mouthY = extractMouthLandmarkFromVideoWrapper(pathToSpeaker) #done once per speaker to ease computation time
  with concurrent.futures.ThreadPoolExecutor() as executor:
    for entry in os.listdir(pathToSpeaker):
      #workerWordEdition(i, entry, pathToSpeaker, mouthX, mouthY)
      executor.submit(workerWordEdition, i, entry, pathToSpeaker, mouthX, mouthY)
  
      


#Zip final dataset on Drive

In [None]:
#exec time 4m
!zip -r "/content/drive/MyDrive/tirocinioWorkingDirectory/datasets/grid/finalDataset/myFinalDatasetCroppedSentenceEdition.zip" "/content/myFinalDataset"

In [None]:
!rm -rf "/content/intermediateDataset"

# Unzip and delete those with nFrames < 3

In [None]:
!rm -rf "/content/myDataset"
!unzip -qq "/content/drive/MyDrive/tirocinioWorkingDirectory/datasets/grid/finalDataset/myFinalDatasetCropped.zip" -d "/content/"
!mv "/content/content/myFinalDataset/" "/content/myDataset"
!rm -rf "/content/content"

In [None]:
import os, subprocess

deletedCount = 0

#this will check the number of frame in the videos just created
for root, dirs, files in os.walk("/content/myDataset"):
  for video in files:
    try:
      filePath = os.path.join(root, video)
      command = "ffmpeg -i {} -map 0:v:0 -c copy -f null -y /dev/null 2>&1 | grep -Eo 'frame= *[0-9]+ *' | grep -Eo '[0-9]+' | tail -1".format(filePath)
      frameNumber = int(subprocess.run(command, capture_output=True, text=True, shell=True).stdout)
      #delete video if its frame count is < 3 (not very clever but still)
      if frameNumber < 3:
        os.system("rm {}".format(filePath))
        print("Removed: ", filePath)
        deletedCount = deletedCount + 1
    except ValueError:
      #numberOfFrames not available -> video corrupted -> delete it!
      os.system("rm {}".format(filePath))
      print("Removed: ", filePath, "because of ValueError")
      deletedCount = deletedCount + 1
print("Tot. delated: ", deletedCount)

In [None]:
!zip -q -r "/content/drive/MyDrive/tirocinioWorkingDirectory/datasets/grid/finalDataset/myFinalDatasetCroppedNewEdition.zip" "/content/myDataset"

In [None]:
#tbd
#test if file removing worked

!find "/content/myDataset/again" -name "WORDagainSP8SENbgag6a.mpg"