<a href="https://colab.research.google.com/github/ProtossDragoon/paper_implementation_and_testing_tf2/blob/main/utils/GDrive_to_GCS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GDrive to GCS

## Author

name : Janghoo Lee <br>
github : https://github.com/ProtossDragoon <br>
contact : dlwkdgn1@naver.com <br>
circle : https://github.com/sju-coml <br>
organization : https://web.deering.co/ <br>
published date : June, 2021

## Related Notebook

[Notebooks](https://github.com/ProtossDragoon/paper_implementation_and_testing_tf2/tree/main/notebooks)


# Environment

## Import

In [None]:
import os

## Global Hyper parameters

In [None]:
HOME_DIR = "/content/gdrive/MyDrive"
DATA_DIR = os.path.join(HOME_DIR, 'data')

GIT_USERNAME = None
GIT_EMAIL = None
GIT_PASSWORD = None

## Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

## GCP

In [None]:
GCP_BUCKET_NAME = "deer-rudolph" #@param {type:"string"}
GCP_BUCKET_DATA_FOLDER_NAME = 'data' #@param {type:"string"}
GCP_PROJECT_NAME = 'deer-deep-learning-project'#@param {type:"string"}
GCP_PROJECT_ID = 'linear-freehold-314804' #@param {type:"string"}
GCP_HOME_DIR = os.path.join('gs://', GCP_BUCKET_NAME)

from google.colab import auth
auth.authenticate_user()
!gcloud config set project {GCP_PROJECT_ID}

In [None]:
!gsutil ls -al

In [None]:
!ls -al

# Copy GDrive data to GCS

In [None]:
%cd {DATA_DIR}
!ls

## [Caution] Specific Dataset Example

In [None]:
DATASET_NAME = 'aihubpedestrian' #@param {type:"string"}
print('from (Gdrive) - {}\nto (GCS) - {}'.format(os.path.join(DATA_DIR, DATASET_NAME), 
                                                 os.path.join(GCP_HOME_DIR, GCP_BUCKET_DATA_FOLDER_NAME, DATASET_NAME)))
print('\nGoogle drive :: ')
_p = os.path.join(DATA_DIR, DATASET_NAME)
%ls _p

print('\nGoogle Cloud Storage :: ')
!gsutil ls gs://{GCP_BUCKET_NAME}/{GCP_BUCKET_DATA_FOLDER_NAME}/{DATASET_NAME}
# If CLI raise CommandException: No URLs matched: <path>, go to GCS console and make directory for <path>.

In [None]:
# If you have a large number of files to transfer, you can perform a parallel multi-threaded/multi-processing copy using the top-level gsutil -m option
!gsutil -m cp -r {DATA_DIR}/{DATASET_NAME} gs://{GCP_BUCKET_NAME}/{GCP_BUCKET_DATA_FOLDER_NAME}

## [Caution] All datasets

In [None]:
# If you have a large number of files to transfer, you can perform a parallel multi-threaded/multi-processing copy using the top-level gsutil -m option
!gsutil -m cp -r {DATA_DIR} gs://{GCS_BUCKET_NAME}/{GCS_BUCKET_DATA_FOLDER_NAME}

## [Caution] Specific File or Directory

In [None]:
_validation_complete = False
import tensorflow as tf # just to use tf.io

FROM_PATH_OR_DIR = '' #@param {type:"string"}
TO_DIR = '' #@param {type:"string"}
MAKE_NEW_IF_TO_DIR_NOT_EXISTS = True #@param {type:"boolean"}

print('from (Gdrive) - {}\nto (GCS) - {}'.format(os.path.join(HOME_DIR, FROM_PATH_OR_DIR),
                                                 os.path.join(GCP_HOME_DIR, TO_DIR)))

print('\nGoogle drive :: ')
_f = os.path.join(HOME_DIR, FROM_PATH_OR_DIR)
if tf.io.gfile.isdir(_f):
    %ls {_f}
    print('[directory] ', end='')
else:
    print('[file] ', end='')
print(_f, end=' ')
if not tf.io.gfile.exists(_f):
    print('does not exists!')
    raise ValueError

print('\n\nGoogle Cloud Storage :: ')
_t = os.path.join(GCP_HOME_DIR, TO_DIR)
if not tf.io.gfile.exists(_t):
    print('{} does not exists!'.format(_t))
    if tf.io.gfile.isdir(_t):
        print('{} is not a directory!'.format(_t))
        raise ValueError
    else:
        if MAKE_NEW_IF_TO_DIR_NOT_EXISTS:
            tf.io.gfile.makedirs(_t)
            print('generated, ', end='')


assert tf.io.gfile.exists(_t), '{} does not exists!'.format(_t)
assert tf.io.gfile.isdir(_t), '{} is not a directory!'.format(_t)
print('[directory]', _t)
!gsutil ls {_t}

_validation_complete = True

print('\nfile cp result example : {}'.format(os.path.join(_t, 'file.xx')))
print('dir cp result example : {}'.format(os.path.join(_t, 'dir/xx')))

In [None]:
assert _validation_complete, 'Validation check was not completed.'

# If you have a large number of files to transfer, you can perform a parallel multi-threaded/multi-processing copy using the top-level gsutil -m option
!gsutil -m cp -r {_f} {_t}

# Copy GCS data to GDrive

## [Caution] Specific File or Directory

In [None]:
_validation_complete_2 = False
import tensorflow as tf # just to use tf.io

FROM_PATH_OR_DIR = '' #@param {type:"string"}
TO_DIR = '' #@param {type:"string"}
MAKE_NEW_IF_TO_DIR_NOT_EXISTS = True #@param {type:"boolean"}
print('from (Gdrive) - {}\nto (GCS) - {}'.format(os.path.join(GCP_HOME_DIR, FROM_PATH_OR_DIR),
                                                 os.path.join(HOME_DIR, TO_DIR)))


print('\nGoogle Cloud Storage :: ')
_f = os.path.join(GCP_HOME_DIR, FROM_PATH_OR_DIR)
if tf.io.gfile.isdir(_f):
    !gsutil ls {_f}
    print('[directory] ', end='')
else:
    print('[file] ', end='')
print(_f, end=' ')
if not tf.io.gfile.exists(_f):
    print('does not exists!')
    raise ValueError

print('\n\nGoogle drive :: ')
_t = os.path.join(HOME_DIR, TO_DIR)
if not os.path.exists(_t):
    print('{} does not exists!'.format(_t))
    if os.path.isdir(_t):
        print('{} is not a directory!'.format(_t))
        raise ValueError
    else:
        if MAKE_NEW_IF_TO_DIR_NOT_EXISTS:
            os.makedirs(_t)
            print('generated, ', end='')

assert os.path.exists(_t), '{} does not exists!'.format(_t)
assert os.path.isdir(_t), '{} is not a directory!'.format(_t)
print('[directory]', _t)
%ls {_t}

_validation_complete_2 = True

print('\nfile cp result example : {}'.format(os.path.join(_t, 'file.xx')))
print('dir cp result example : {}'.format(os.path.join(_t, 'dir/xx')))

In [None]:
assert _validation_complete_2, 'Validation check was not completed.'

# If you have a large number of files to transfer, you can perform a parallel multi-threaded/multi-processing copy using the top-level gsutil -m option
!gsutil -m cp -r {_f} {_t}