In [1]:
%load_ext autoreload
%autoreload 2

import sys, random
import psycopg2, json
import numpy as np

from collections import defaultdict

# For managing relative imports from notebook
if '..' not in sys.path: sys.path.append('..')

import config.config as dfc
import deepfake.dfutillib as df
import deepfake.postgresdb as pgdb

In [12]:
%%time

# This function reads each partition's metadata.json file, compiles a correpsonding 
# list of all videos, split into order-randomized training and validation sets,
# then assigns these to epoch blocks and inserts everything into the database.

def create_videos_data(istart, istop=None, validation_split=0.1, epochsz=200):
#{
    # Like range args
    if istop is None: istart, istop = 0, istart

    vtrains, vvalids = [],[]
    for i in range(istart, istop):
    #{
        vpart = []
        try:
            # Store all valid tuples from the partition metadata file
            with open(f"{df.traindir(i)}/metadata.json") as jsonfile:
            #{
                metadata = json.load(jsonfile)
                for vidname, meta in metadata.items():
                #{                
                    if df.file_exists(f"{df.traindir(i)}/{vidname}"):
                        vtup = pgdb.VideoTuple(vidname=vidname, part_id=i, label=meta['label'])
                        if meta['label'] == 'REAL': vpart.append(vtup) 
                        elif df.file_exists(f"{df.traindir(i)}/{meta['original']}"):
                            vtup.origname = meta['original']
                            vpart.append(vtup)
                #}
            #}
        except PermissionError as err: print("ERROR:", err)

        # Randomly select a validation subset
        nvalids = round(validation_split*len(vpart))
        vindices = set(random.sample(range(len(vpart)), nvalids))

        # Separate into respective split
        for j in range(len(vpart)):
        #{
            if j in vindices:
                vpart[j].split = "validate"
                vvalids.append(vpart[j])
            else:
                vpart[j].split = "train"
                vtrains.append(vpart[j])
        #}
        
        print(f"Partition {i}: {len(vpart)} videos")
    #}

    print(f"\nTotal dataset:")
    print(f"  {len(vtrains)} training videos")
    print(f"  {len(vvalids)} validation videos")
    print("\nCreating epoch video-blocks...")
    
    # Randomize video orders
    np.random.shuffle(vtrains)
    np.random.shuffle(vvalids)

    # Init step sizes and create epoch blocks
    nvalids = int(epochsz*validation_split)    
    rgt = range(0, len(vtrains), epochsz - nvalids)
    rgv = range(0, len(vvalids), nvalids)
    
    #eblocks = []
    for blkid, (ti, vi) in enumerate(zip(rgt, rgv)):
    #{
        for vtup in vtrains[ti:ti+epochsz-nvalids]: vtup.blk_id = blkid; print (vtup); return
        #eblocks.append(pgdb.EpochTuple(blk_id=blkid, split="train"))

        for vtup in vvalids[vi:vi+nvalids]: vtup.blk_id = blkid
        #eblocks.append(pgdb.EpochTuple(blk_id=blkid, split="validate"))
    #}
    print(f"  {blkid+1} epoch video-blocks created")

    print(f"\nClipping nvideos from tail:")
    print(f"  {len(vtrains[ti:])} training videos")
    print(f"  {len(vvalids[vi:])} validation videos")
    
    del vtrains[ti:]
    del vvalids[vi:]
    
    print(f"\nAttempting database insert of:")
    #print(f"  {len(eblocks)} epoch blocks")
    print(f"  {len(vtrains)} training videos")
    print(f"  {len(vvalids)} validation videos\n")
        
    # Insert epoch blocks and videos into DB
    with pgdb.PostgreSqlHandle() as db_handle:
        if not db_handle.initialize_database(): print("Populate aborted")
        else: db_handle.populate_database(vtrains, vvalids)  
#}

create_videos_data(50)

Partition 0: 1334 videos
Partition 1: 1699 videos
Partition 2: 1748 videos
Partition 3: 1455 videos
Partition 4: 1701 videos
Partition 5: 2483 videos
Partition 6: 3464 videos
Partition 7: 2473 videos
Partition 8: 1816 videos
Partition 9: 1736 videos
Partition 10: 3192 videos
Partition 11: 2118 videos
Partition 12: 2225 videos
Partition 13: 3694 videos
Partition 14: 2464 videos
Partition 15: 2273 videos
Partition 16: 2061 videos
Partition 17: 2430 videos
Partition 18: 2683 videos
Partition 19: 2752 videos
Partition 20: 2154 videos
Partition 21: 2268 videos
Partition 22: 2409 videos
Partition 23: 2410 videos
Partition 24: 2786 videos
Partition 25: 2546 videos
Partition 26: 2433 videos
Partition 27: 2353 videos
Partition 28: 2085 videos
Partition 29: 2557 videos
Partition 30: 2236 videos
Partition 31: 2470 videos
Partition 32: 2356 videos
Partition 33: 2274 videos
Partition 34: 2658 videos
Partition 35: 2535 videos
Partition 36: 2339 videos
Partition 37: 2655 videos
Partition 38: 2477 vid

In [8]:
with pgdb.PostgreSqlHandle() as db_handle:
    a = db_handle.sqlquery("SELECT to_regclass('epoch_queue')", fetch='one')[0]
    print(a is not None, type(a))

False <class 'NoneType'>


In [13]:
with pgdb.PostgreSqlHandle() as db_handle:
    if not db_handle.initialize_database(): print("Populate aborted")

Commencing database initialization...
ERROR: syntax error at or near "blk_id"
LINE 3:                 blk_id INTEGER NOT NULL,
                        ^

Populate aborted
