# upload_new_data

## Allows uploading images to GoogleDrive 'CS407-CCP'

### Requires a settings.yaml file with a valid client id and client secret for OAuth 2.0.

Adds new files to GoogleDrive, skips existing files.

Files should be in a folder named after the index of the character. Filenames should be in format "char_index"_"image_number".png

**(!)** If uploading files with filenames that may overlap with files in the cloud, ensure append = True so new filenames will be generated.

In [35]:
### CREATE CONNECTION TO G CLOUD - RUN THIS FIRST

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import os

gauth = GoogleAuth()
gauth.LocalWebserverAuth()

drive = GoogleDrive(gauth)

# GoogleDrive id of the source data folder
source_id = '1llWDb2mJkG4RGvR-R9otKhK9YdkmfhyZ'
root_dir = './source'
append = True

In [None]:
### GET LIST OF ALL FILES IN THE G CLOUD FROM FOLDER WITH root_id

def get_objects(root_id):
    folder_list = {}
    top_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % root_id}).GetList()
    for file in top_list:
        folder_list[str(file['title'])] = file['id']

    return folder_list

folder_list = get_objects(source_id)

In [46]:
### Upload a local file to Google Drive
#   Params:
#   folder_id - the id of the Google Drive folder
#   filename - filename of local file to upload
def upload_file(folder_id, filename, cloud_filename, path_to_file):
    metadata = {
        'parents': [
            {"id": folder_id}
        ],
        'title': cloud_filename
    }
    file = drive.CreateFile(metadata=metadata)
    file.SetContentFile(os.path.join(path_to_file, filename))
    file.Upload()

def main():
    print("Append mode is on") if append else print("Append mode is off")
    for root, dirs, files in os.walk(root_dir):
        for name in files:
            if '.png' in name:
                folder = root[root.index("\\")+1:]
                local_filename = name
                
                # Local filenames should have format <char_index>_<image_num>.png
                if "_" not in local_filename:
                    local_filename = folder + "_" + local_filename
                    
                folder_content = get_objects(folder_list[folder])
                
                # Append will cause files to be given new names and added to the cloud if a file already exists with the same filename
                if append:
                    folder_filenames = []
                    
                    # Get list of all files in the current cloud folder
                    list_val = list(folder_content.keys())
                    print(list_val)
                    
                    # Obtain new filename by incrementing the count of the highest number filename in the cloud
                    for x in range(len(list_val)):
                        folder_filenames.append(int(list_val[x][list_val[x].index("_")+1:list_val[x].index(".")]))
                        
                    # New filename count is not higher than previous largest filename count e.g. 1_4.png in cloud gives 1_5.png for new file    
                    cloud_filename = folder + "_" + str(max(folder_filenames)+1) + ".png"
                    upload_file(folder_list[folder], local_filename, cloud_filename, os.path.join(root_dir, folder))
                    print(f"Uploaded {local_filename} to folder {folder} with new filename {cloud_filename}")
                else: # If not appending, skip uploads for files where cloud contains a file with the same name
                    if local_filename in folder_content.keys():
                        print(f"File {local_filename} is already in folder {folder}, skipping.")
                    else:
                        upload_file(folder_list[folder], local_filename, local_filename, os.path.join(root_dir, folder))
                        print(f"File {local_filename} successfully uploaded to folder {folder}.")
                
main()

Append mode is on
['1_11.png', '1_10.png', '1_20.png', '1_18.png', '1_19.png', '1_17.png', '1_15.png', '1_14.png', '1_13.png', '1_16.png', '1_12.png', '1_9.png', '1_8.png', '1_7.png', '1_6.png', '1_5.png', '1_4.png', '1_3.png', '1_2.png', '1_1.png']
Uploaded 1_1.png to folder 1 with new filename 1_21.png
['1_21.png', '1_11.png', '1_10.png', '1_20.png', '1_18.png', '1_19.png', '1_17.png', '1_15.png', '1_14.png', '1_13.png', '1_16.png', '1_12.png', '1_9.png', '1_8.png', '1_7.png', '1_6.png', '1_5.png', '1_4.png', '1_3.png', '1_2.png', '1_1.png']
Uploaded 1_10.png to folder 1 with new filename 1_22.png
['1_22.png', '1_21.png', '1_11.png', '1_10.png', '1_20.png', '1_18.png', '1_19.png', '1_17.png', '1_15.png', '1_14.png', '1_13.png', '1_16.png', '1_12.png', '1_9.png', '1_8.png', '1_7.png', '1_6.png', '1_5.png', '1_4.png', '1_3.png', '1_2.png', '1_1.png']
Uploaded 1_11.png to folder 1 with new filename 1_23.png
['1_23.png', '1_22.png', '1_21.png', '1_11.png', '1_10.png', '1_20.png', '1_18.p

KeyboardInterrupt: 

In [43]:
''' TESTING AREA

# list1 = {1: '1_1.png', 2: '1_10.png', 3: '1_5.png', 4: '1_3.png'}
# list2 = [1,5,2,3,7]
# # list1[list1[x][list1[x].index("_")+1:list1[x].index(".")] for x in range(len(list1))].sort()
# list3 = []
# list_val = list(list1.values())
# for x in range(len(list_val)):
#     list3.append(int(list_val[x][list_val[x].index("_")+1:list_val[x].index(".")]))
# list3
# print(max(list3))

x = False
print("Append mode is on") if x else print("Append mode is off")

folder_content = get_objects(folder_list['1'])
list_val = list(folder_content.keys())
print(list_val)
folder_filenames = []
for x in range(len(list_val)):
    folder_filenames.append(int(list_val[x][list_val[x].index("_")+1:list_val[x].index(".")]))
print(folder_filenames)

Append mode is off
['1_20.png', '1_11.png', '1_10.png', '1_18.png', '1_19.png', '1_17.png', '1_15.png', '1_14.png', '1_13.png', '1_16.png', '1_12.png', '1_9.png', '1_8.png', '1_7.png', '1_6.png', '1_5.png', '1_4.png', '1_3.png', '1_2.png', '1_1.png']
[20, 11, 10, 18, 19, 17, 15, 14, 13, 16, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1]
