In [None]:
# Run this code before each session
! apt install tesseract-ocr
! apt install libtesseract-dev
! pip install pytesseract

In [None]:
# Import packages/libraries
import numpy as np
import pandas as pd
import re
import os
import shutil
from google.colab import drive
from glob import glob

In [None]:
# This will connect to your Google Drive. It will ask you to allow access
drive.mount('/content/drive', force_remount=True)

In [None]:
'''
Method for copying files between folders. It will copy the contents of every subfolder
within the specified source folder given it is NOT PRESENT in the destination. 
If there are a few additional files not in subfolders, specify those in lines 1 and 2 below for them to get copied. 

Parameters:
-----------------
source
  - path for the source folder from which files/folders are being copied
copy 
  - path for the destination folder which files are copied into
directory_name
  - Name for the directory/folder which is created to store the files in the destination
  - Is only used if the source and destination directories are wholly independent and do not share any files

Returns: 
----------------
none 
'''

def copy_files(source, destination, directory_name):
  source_file_list = os.listdir(source)
  copy_file_list = os.listdir(destination)
  missing_files = [i for i in source_file_list if i not in copy_file_list]
  if len(missing_files):
    print(missing_files)
  else:
    print("There are no files missing from the destination")

  try:
    if missing_files == source_file_list: # Will copy entire source folder into destination when no subfolders/files are shared between the two
      shutil.copytree(source, destination + '/' + directory_name, ignore = shutil.ignore_patterns('*.gdoc', '*.gsheet', '*.gslides')) #1

    else:
        for folder in missing_files: # Will copy all missing files/subfolders not present in the destination
          new_dst = destination + '/' + folder
          if os.path.isfile(folder) == False: # Copies all subfolders/subdirectories
            shutil.copytree(source + '/' + folder, new_dst, ignore = shutil.ignore_patterns('*.gdoc', '*.gsheet', '*.gslides')) #1
            missing_files.remove(folder)
          else: # Copies files not contained within a subdirectory
            shutil.copy(source + '/' + folder, destination)
            missing_files.remove(folder) 
        print("These folders/files were not copied (ignore if list is empty): ")
        print(missing_files)
  except:
    print("An error occured. Check if the directory you want to copy already exists in the destination location, or if it contains a google file/another file which cannot be copied")
    print("See the text below on how you can resolve this")



The arguments of copy_tree (both labeled #1) for the copy_files method should be updated **manually** to include extensions of any files which cannot be copied using shutil.
For instance, google files (with extension .gdoc, .gsheets etc.) cannot be copied as they are 'special.' Just either a) download and reupload them manually instead, or b) create a corresponding docs/sheets/slides file in the destination folder and copy-paste the contents of the source file. 

More info: https://stackoverflow.com/questions/65510040/how-to-copy-a-google-file-with-colaboratory

More info about ignore and shutil: https://stackoverflow.com/questions/42487578/python-shutil-copytree-use-ignore-function-to-keep-specific-files-types 

------------------------------------------------------------------------------

Additionally, when passing in parameters to this function, be sure that you know whether you are copying a folder with files or a folder containing subfolders, and whether or not a folder of this name already exists in the desired destination. This will help avoid errors

In [None]:
'''
Method for counting the number of files in a directory. Useful in google drive where
this information is not readily visible.

Parameters: 
----------------
source
  - path for the source folder to count files from

Returns: 
----------------
none 

'''
def count_files(source):
  count = 0
  for folder in os.listdir(source): # if the file is a folder and needs to be expanded
    if os.path.isfile(source + '/' + folder) == False:
      files = os.listdir(source + '/' + folder)
      count += len(files)
    else: # if the file is a file and can be counted directly
      count += 1
  print(count)


In [None]:
'''
Main method
Used to call other functions above. User should update arguments manually for their
specific Drive path.

Parameters: 
----------------
none 

Returns: 
----------------
none 

'''

args = ("/content/drive/MyDrive/Duke 2022-2023/Data+/2_Camera Trap photos/Stream Photos/On_Deck", "/content/drive/MyDrive/Duke 2022-2023/Data+/2_Camera Trap photos/COPY of data for script/On_Deck", "Newly_uploaded_data")
# copy_files(*args)
# count_files('/content/drive/MyDrive/Duke 2022-2023/Data+/2_Camera Trap photos/COPY of data for script/Newly_uploaded_data')


