In [0]:
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
from datetime import datetime

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

import uuid
import os
import json


class GoogleDriveDatabase:
  def __init__(self, drive, DATABASE_GID:str):
    assert isinstance(DATABASE_GID, str)
    self.folders = {}
    self.drive = drive
    folder_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(DATABASE_GID)}).GetList()
    for file in folder_list:
      if file['mimeType'] == "application/vnd.google-apps.folder":
        self.folders[file['title']] = file['id']
    print("{} folders loaded".format(len(self.folders.keys())))
  def upload(self, filename, character, fileType):
    assert isinstance(filename, str)
    assert isinstance(character, str)
    assert isinstance(fileType, str)
    FILETYPE_MIME_MAP = {
        "jpeg" : "image/jpeg",
        "json" : "application/json",
        "zip" : "application/zip"
    }
    assert fileType in FILETYPE_MIME_MAP.keys(), "fileType must be one of the following: {}".format(fileType)
    assert os.path.isfile(filename), "{} does not exist as a file".format(filename)
    assert self.checkFolder(character), "{} is not a valid character. Pick from list: \n{}".format(character, tuple(self.folders.keys()))
    file = self.drive.CreateFile({
        "title" :  os.path.split(filename)[1],
        "mimeType" : FILETYPE_MIME_MAP[fileType],
        "parents" : [{"id" : self.folders[character]}]
    })
    file.SetContentFile(filename)
    file.Upload()
    os.remove(filename)
    print("uploaded and deleted {}".format(filename))

  @staticmethod
  def FILE_EXTENSIONS() -> list:
    return [".jpg", ".jpeg", ".png", ".zip", ".json"]

  def getFiles(self, character) -> list:
    return [
      x for x in self.drive.ListFile({'q': "'{}' in parents and trashed=false".format(self.folders[character])}).GetList()
      if x['mimeType'] != "application/vnd.google-apps.folder" 
    ]
  def download_file_name(self, file_name):
      if not any(extension in file_name for extension in GoogleDriveDatabase.FILE_EXTENSIONS()):
        file_name += ".jpg"
      return file_name
  def download_file(self, file, folder:str, **kwargs) -> str:
      check_already_exist = kwargs.get("check_local", False)
      file_name = os.path.join(folder, file['title'])
      file_name = self.download_file_name(file_name)
      if check_already_exist:
        local_files = [os.path.join(folder, file) for file in os.listdir(folder)]
        if file_name in local_files:
          return file_name
      file.GetContentFile(file_name)
      print("downloaded", file_name)
      return file_name

  def download(self, character:str,folder:str):
    file_list = self.getFiles(character)
    os.makedirs(folder, exist_ok=True)
    returnlist = []
    
    for file in file_list:
      returnlist.append(self.download_file(file, folder, check_local=True))


    return tuple(returnlist)

  def checkFolder(self, name):
    assert isinstance(name, str)
    return name in self.folders.keys()

auth.authenticate_user() # Google auth stuff, make sure to sign in with your ucsb account
gauth = GoogleAuth() # Google auth stuff
gauth.credentials = GoogleCredentials.get_application_default() # Google auth stuff
drive = GoogleDrive(gauth) # Google auth stuff

RAW_JSON_DATABASE = "1kLMivfz2q7DFwa57ddy-rCJmsYFTWhMW"
FORMED_JSON_DATABASE = "1et2dH2MxbF2rOZ9n2Ch_RBYfPf1TbWob"

raw_json_database = GoogleDriveDatabase(drive, RAW_JSON_DATABASE)
formed_json_database = GoogleDriveDatabase(drive, FORMED_JSON_DATABASE)

def FormJsonCharacter(character):
  unformed_path = os.path.join("raw_json/", character + "/")
  formed_path = "formed_json/"
  os.makedirs(unformed_path, exist_ok=True)
  os.makedirs(formed_path, exist_ok=True)
  formed_json_files = formed_json_database.getFiles(character)
  raw_json_files = raw_json_database.getFiles(character)
  if len(formed_json_files) > 1:
    print("{} formed files for {}. Will delete them.".format(len(formed_json_files), character))
    [x.Delete() for x in raw_json_files]
  formed_json_data = []
  if len(formed_json_files) == 1:
    formed_json_local_copy = formed_json_database.download_file(formed_json_files[0], formed_path)
    with open(formed_json_local_copy) as f:
        formed_json_data = json.load(f)

  if len(raw_json_files) == 0:
    print("No raw json files found for {}".format(character))
    return []

  if len(raw_json_files) == len(formed_json_data):
    print("{} already formed".format(character))
    return formed_json_data
  elif len(formed_json_data) > 0:
    print("While {} data was already created, {} raw files were found so data must be reformed".format(len(formed_json_data), len(raw_json_files)))
    formed_json_files[0].Delete()

  downloaded_json = []
  for file_reference in raw_json_files:
    downloaded_json.append(raw_json_database.download_file(file_reference, unformed_path, check_local=True))
  formed = []

  for local_file in downloaded_json:
    with open(local_file) as f:
      data = json.load(f)
      formed.append(data)

  formed_file_name = os.path.join(formed_path, character) + ".json"

  with open(formed_file_name, 'w') as f:
    json.dump(formed, f)

  formed_json_database.upload(formed_file_name, character, "json")
  return formed

def FormJson():
  formed_path = "formed_json/"
  os.makedirs(formed_path, exist_ok=True)
  CHARACTERS = ("A", "B", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y")
  COMPLETE_FORM = {}

  fully_formed_already_reference = formed_json_database.getFiles("ALL")

  if len(fully_formed_already_reference) > 1:
    [x.Delete() for x in fully_formed_already_reference]
    raise Exception("{} fully formed files found".format(len(fully_formed_already_reference)))
  elif len(fully_formed_already_reference) == 1:
    fully_formed_already_local = formed_json_database.download_file(fully_formed_already_reference[0], formed_path)
    with open(fully_formed_already_local, "r") as f:
       fully_formed_past_data = json.load(f)

    

      

  for c in CHARACTERS:
    character_data = FormJsonCharacter(c)
    COMPLETE_FORM[c] = [x for x in character_data if x and not all(not y for y in x)]

  complete_file = os.path.join(formed_path, "training_data.json")
  with open(complete_file, "w") as f:
    json.dump(COMPLETE_FORM, f)
  if len(fully_formed_already_reference) == 1:
    fully_formed_already_reference[0].Delete()
  formed_json_database.upload(complete_file, "ALL", "json")
  
FormJson()

24 folders loaded
25 folders loaded
downloaded formed_json/training_data.json
downloaded formed_json/A.json
A already formed
downloaded formed_json/B.json
B already formed
downloaded formed_json/C.json
C already formed
downloaded formed_json/D.json
D already formed
downloaded formed_json/E.json
E already formed
downloaded formed_json/F.json
F already formed
downloaded formed_json/G.json
G already formed
downloaded formed_json/H.json
H already formed
downloaded formed_json/I.json
I already formed
downloaded formed_json/K.json
K already formed
downloaded formed_json/L.json
L already formed
downloaded formed_json/M.json
M already formed
downloaded formed_json/N.json
While 196 data was already created, 256 raw files were found so data must be reformed
downloaded raw_json/N/N_N_03_02_2020_07_49_20_8.json
downloaded raw_json/N/N_N_03_02_2020_07_44_14_2.json
downloaded raw_json/N/N_N_03_02_2020_07_49_20_5.json
downloaded raw_json/N/N_N_03_02_2020_07_44_14_5.json
downloaded raw_json/N/N_N_03_0