# Google Drive connection

In [1]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google'

### Complete Linux sandbox

In [None]:
# Current directory
!pwd
# List contents
!ls
# Root
!ls /
# Home
!ls ~/
# Copy: cp, make directory: mkdir, ...

### Ready with Python, Tensorflow, Keras, ...

In [None]:
!python --version
# import <tab>

# Kaggle API
### After downloading key from kaggle.com (My Account -> Create new API token) to Google Drive:
Access Google Drive, copy kaggle.json to Colab and report progress.  
Authentication of Google account is part of the process (once every session).

In [None]:
from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth
auth.authenticate_user()
drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'kaggle.json'", fields="files(id)").execute()
kaggle_api_key = results.get('files', [])
filename = "/.kaggle/kaggle.json"
os.makedirs(os.path.dirname(filename), exist_ok=True)
request = drive_service.files().get_media(fileId=kaggle_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)

In [None]:
# Check existence of file
!ls /.kaggle
!cat /.kaggle/kaggle.json

In [None]:
# Notice the difference in path names with/without ~
!mkdir -p ~/.kaggle
!cp /.kaggle/kaggle.json ~/.kaggle/

In [None]:
# Install Python part of kaggle
!pip install kaggle

### Check if connection is succesfull

In [None]:
# As of Tuesday after lunch, it seems that the Kaggle API is offline. We hope it will be back soon.
!kaggle competitions list

### Download MNIST-like fashion data from Zalando Research

In [None]:
!kaggle datasets download -d zalando-research/fashionmnist -p /content/kaggle

See what you got

In [None]:
!ls /content/kaggle

Oooh. A zip file. Let's unzip it.

In [None]:
!unzip /content/kaggle/*.zip -d /content/kaggle/
!ls /content/kaggle/

Go mad using Python

# OpenML

In [None]:
# Install package
!pip install git+https://github.com/openml/openml-python.git@develop

### Copy API key from Google Drive to Colab
Requires that you have first logged into OpenML and saved your API key as 'apikey=MYKEY' in a file named 'config' directly to your Drive.

In [None]:
# Copy API key
from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth
auth.authenticate_user()
drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'config'", fields="files(id)").execute()
config_api_key = results.get('files', [])
filename = "/.openml/config"
os.makedirs(os.path.dirname(filename), exist_ok=True)
request = drive_service.files().get_media(fileId=config_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)

In [None]:
# This should show 'config' as the output
!ls /.openml

### Test OpenML connection by listing some data sets

In [None]:
import openml as oml
openml_list = oml.datasets.list_datasets() # Returns a dict

# Show a nice table with some key data properties
import pandas as pd
datalist = pd.DataFrame.from_dict(openml_list, orient='index') 
datalist = datalist[[
    'did','name','NumberOfInstances',
    'NumberOfFeatures','NumberOfClasses'
]]
print("First 10 of %s datasets..." % len(datalist))
datalist.head(n=10)