# upload_new_data

## Allows uploading images to GoogleDrive 'CS407-CCP'

### Requires a settings.yaml file with a valid client id and client secret for OAuth 2.0.

Adds new files to GoogleDrive, skips existing files.

Files should be in a folder named after the index of the character. Filenames should be in format "char_index"_"image_number".png

**(!)** If uploading files with filenames that may overlap with files in the cloud, ensure append = True so new filenames will be generated.

In [47]:
### CREATE CONNECTION TO G CLOUD - RUN THIS FIRST

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import os
import pandas as pd

gauth = GoogleAuth()
gauth.LocalWebserverAuth()

drive = GoogleDrive(gauth)

# GoogleDrive id of the source data folder
source_id = '1llWDb2mJkG4RGvR-R9otKhK9YdkmfhyZ'
append = True
data_dir = './source'

In [48]:
### GET LIST OF ALL FILES IN THE G CLOUD FROM FOLDER WITH root_id

def get_objects(root_id):
    folder_list = {}
    top_list = drive.ListFile({'q': "'%s' in parents and trashed=false" % root_id}).GetList()
    for file in top_list:
        folder_list[str(file['title'])] = file['id']

    return folder_list

folder_list = get_objects(source_id)

In [49]:
''' RUN THIS TO CHECK THAT ACCESS TO G DRIVE HAS BEEN SUCCESSFULL - SHOULD SHOW IMAGE FOLDERS '''
print(folder_list)

{'CS407 Presentation': '1aun3kWKRqYDY3niKSoLsMtcVM9ZpqWWZqbqlYHl8fsE', '999': '1-NKcFn-6v0iccskcq8JFWOR9200EE0m4', '994': '1rpLbWMRm0mu-K5nJ5potGvqK3coyf5Bd', '997': '1QHsSOXFnev4RAbm37wjM16K_MK-ROBnE', '996': '1MiNbNcE8nGl1KDX9jqczOb_QhMHXtoBZ', '995': '1G_7zWBvoXpWkKUKCpNKIFO_52Jh_xT72', '998': '1sZq4yhprw81wzgcU_A-VokX6Fy8QTbTv', '993': '1CDv_1lcRtv0jw6Ey5KN5fowxmOityyqL', '989': '1YbnJf0HpT-lEyOPyBn0q2CUtgpRT9wWp', '986': '17byw7wp3sL5XRNo3tB0vL-OfsLazLE7O', '985': '1_s6qBL0FBAn0Xttk2Nvl1ygLmWAPSpjg', '991': '1mCOBnQiHuPW_yZYEeeTxe-bLZBdg6un5', '99': '1ryz6FZfuyQow2pRE1Xb_BMqQBOUANfY3', '988': '1O558N7ZX55wC24t4-d5lnnkBRUm5cZe1', '990': '1iRSm9vLzJRkCBipZSIYXNbsTNgIiIV2C', '987': '1lPND4XleVOMI9xq3aBRb2vBaOuxlItda', '992': '1JaiR7Bmyg-mOECC0XsnUWdvEZuvo_PdY', '976': '15kWeb96lonQZlVMDzRakBj5y_zhaFbCD', '981': '15wOoOKtMeUHWyoPAJTmAIsW8auBZSJP8', '980': '1SgakFxfwSNt1q2BYBqCAGIZO6hrxEzSS', '983': '1-Ra2X7HrDDb-AWmgIUYkGEz2iVWhweTT', '978': '1Jzvn5xge_nQhYlgftZ6BlZX8WC5xy1HX', '98': 

In [55]:
### Upload a local file to Google Drive
#   Params:
#   folder_id - the id of the Google Drive folder
#   filename - filename of local file to upload
def upload_file(folder_id, filename, cloud_filename, path_to_file):
    metadata = {
        'parents': [
            {"id": folder_id}
        ],
        'title': cloud_filename
    }
    file = drive.CreateFile(metadata=metadata)
    file.SetContentFile(os.path.join(path_to_file, filename))
    file.Upload()
    
def create_folder(parent_folder_id, folder_name):
    if folder_name in folder_list.keys():
        return None
    
    file_metadata = {
        'title': folder_name,
        # Parent folder
        'parents': [
            {'id': parent_folder_id}
        ], 
        'mimeType': 'application/vnd.google-apps.folder'
    }

    folder = drive.CreateFile(file_metadata)
    folder.Upload()

def delete_files(folder_names):
    ''' DELETES ALL FILES IN FOLDERS INCLUDED IN LIST: folder_names '''
    file_count = 0
    for i in folder_list.keys():
        if i in folder_names:
            folder_files = get_objects(folder_list[i])
#             print(folder_files)
            for j in folder_files.keys():
                file1 = drive.CreateFile({'id': folder_files[j]})
                file1.Trash()  # Move file to trash.
                file1.UnTrash()  # Move file out of trash.
                file1.Delete()  # Permanently delete the file.
                print(f'[INFO] Deleted file {j}.')
                file_count += 1
                
    print(f'[INFO] Deleted {file_count} files from drive.')
                

def main():
    print("Append mode is on") if append else print("Append mode is off")
    if os.path.exists(os.path.join(data_dir, 'trainData.csv')):
        df = pd.read_csv(os.path.join(data_dir,'trainData.csv'), sep=",", names = ["img", "label"])
        for i in range(5):
            label = str(df["label"].iloc[i])
            local_filename = df["img"].iloc[i].split(os.sep)[-1]
            folder_content = get_objects(folder_list[label])

            # Append will cause files to be given new names and added to the cloud if a file already exists with the same filename
            if append:
                folder_filenames = []

                # Get list of all files in the current cloud folder
                list_val = list(folder_content.keys())
                print(list_val)

                # Obtain new filename by incrementing the count of the highest number filename in the cloud
                for x in range(len(list_val)):
                    folder_filenames.append(int(list_val[x][list_val[x].index("_")+1:list_val[x].index(".")]))

                # New filename count is not higher than previous largest filename count e.g. 1_4.png in cloud gives 1_5.png for new file    
                cloud_filename = label + "_" + str(max(folder_filenames)+1) + ".png"
                upload_file(folder_list[label], local_filename, cloud_filename, os.path.join(data_dir, label))
                print(f"Uploaded {local_filename} to folder {label} with new filename {cloud_filename}")
            else: # If not appending, skip uploads for files where cloud contains a file with the same name
                if local_filename in folder_content.keys():
                    print(f"File {local_filename} is already in folder {label}, skipping.")
                else:
                    upload_file(folder_list[label], local_filename, local_filename, os.path.join(data_dir, label))
                    print(f"File {local_filename} successfully uploaded to folder {label}.")
                
# main()

In [43]:
''' TESTING AREA

# list1 = {1: '1_1.png', 2: '1_10.png', 3: '1_5.png', 4: '1_3.png'}
# list2 = [1,5,2,3,7]
# # list1[list1[x][list1[x].index("_")+1:list1[x].index(".")] for x in range(len(list1))].sort()
# list3 = []
# list_val = list(list1.values())
# for x in range(len(list_val)):
#     list3.append(int(list_val[x][list_val[x].index("_")+1:list_val[x].index(".")]))
# list3
# print(max(list3))

x = False
print("Append mode is on") if x else print("Append mode is off")

folder_content = get_objects(folder_list['1'])
list_val = list(folder_content.keys())
print(list_val)
folder_filenames = []
for x in range(len(list_val)):
    folder_filenames.append(int(list_val[x][list_val[x].index("_")+1:list_val[x].index(".")]))
print(folder_filenames)

Append mode is off
['1_20.png', '1_11.png', '1_10.png', '1_18.png', '1_19.png', '1_17.png', '1_15.png', '1_14.png', '1_13.png', '1_16.png', '1_12.png', '1_9.png', '1_8.png', '1_7.png', '1_6.png', '1_5.png', '1_4.png', '1_3.png', '1_2.png', '1_1.png']
[20, 11, 10, 18, 19, 17, 15, 14, 13, 16, 12, 9, 8, 7, 6, 5, 4, 3, 2, 1]


In [57]:
folders = []
delete_files(folders)

[INFO] Deleted 0 files from drive.
