# Kaggle API
### After downloading key from kaggle.com to Google Drive:
Access Google Drive, copy kaggle.json to Colab and report progress.  
Authentication of Google account is part of the process (once every session).

In [1]:
from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth
auth.authenticate_user()
drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'kaggle.json'", fields="files(id)").execute()
kaggle_api_key = results.get('files', [])
filename = "/.kaggle/kaggle.json"
os.makedirs(os.path.dirname(filename), exist_ok=True)
request = drive_service.files().get_media(fileId=kaggle_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)

Download 100%.


In [0]:
# Check existence of file
!ls /.kaggle
!cat /.kaggle/kaggle.json

In [0]:
!mkdir -p ~/.kaggle
!cp /.kaggle/kaggle.json ~/.kaggle/

In [4]:
# Install Python part of kaggle
!pip install kaggle

Collecting kaggle
[?25l  Downloading https://files.pythonhosted.org/packages/c6/78/832b9a9ec6b3baf8ec566e1f0a695f2fd08d2c94a6797257a106304bfc3c/kaggle-1.4.7.1.tar.gz (52kB)
[K    100% |████████████████████████████████| 61kB 4.0MB/s 
Collecting python-slugify (from kaggle)
  Downloading https://files.pythonhosted.org/packages/00/ad/c778a6df614b6217c30fe80045b365bfa08b5dd3cb02e8b37a6d25126781/python-slugify-1.2.6.tar.gz
Collecting Unidecode>=0.04.16 (from python-slugify->kaggle)
[?25l  Downloading https://files.pythonhosted.org/packages/59/ef/67085e30e8bbcdd76e2f0a4ad8151c13a2c5bce77c85f8cad6e1f16fb141/Unidecode-1.0.22-py2.py3-none-any.whl (235kB)
[K    100% |████████████████████████████████| 235kB 7.2MB/s 
[?25hBuilding wheels for collected packages: kaggle, python-slugify
  Running setup.py bdist_wheel for kaggle ... [?25l- done
[?25h  Stored in directory: /root/.cache/pip/wheels/44/2c/df/22a6eeb780c36c28190faef6252b739fdc47145fd87a6642d4
  Running setup.py bdist_wheel for pyt

### Check if connection is succesfull

In [5]:
# As of Tuesday after lunch, it seems that the Kaggle API is offline. We hope it will be back soon.
!kaggle competitions list

ref                                            deadline             category            reward  teamCount  userHasEntered  
---------------------------------------------  -------------------  ---------------  ---------  ---------  --------------  
digit-recognizer                               2030-01-01 00:00:00  Getting Started  Knowledge       2649           False  
titanic                                        2030-01-01 00:00:00  Getting Started  Knowledge       9858            True  
house-prices-advanced-regression-techniques    2030-01-01 00:00:00  Getting Started  Knowledge       4192            True  
imagenet-object-localization-challenge         2029-12-31 07:00:00  Research         Knowledge         26           False  
pubg-finish-placement-prediction               2019-01-30 23:59:00  Playground            Swag         38           False  
human-protein-atlas-image-classification       2019-01-10 23:59:00  Featured           $37,000        342           False  
two-sigm

### Download MNIST-like fashion data from Zalando Research

In [0]:
!kaggle datasets download -d zalando-research/fashionmnist -p /content/kaggle

See what you got

In [0]:
!ls /content/kaggle

Oooh. A zip file. Let's unzip it.

In [0]:
!unzip /content/kaggle/*.zip -d /content/kaggle/
!ls /content/kaggle/

Go mad using Python

# OpenML

In [0]:
# Install package
!pip install git+https://github.com/openml/openml-python.git@develop

### Copy API key from Google Drive to Colab
Requires that you have first logged into OpenML and saved your API key as 'apikey=MYKEY' in a file named 'config' directly to your Drive.

In [0]:
# Copy API key
from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth
auth.authenticate_user()
drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'config'", fields="files(id)").execute()
config_api_key = results.get('files', [])
filename = "/.openml/config"
os.makedirs(os.path.dirname(filename), exist_ok=True)
request = drive_service.files().get_media(fileId=config_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)

In [0]:
# This should show 'config' as the output
!ls /.openml

### Test OpenML connection by listing some data sets

In [0]:
import openml as oml
openml_list = oml.datasets.list_datasets() # Returns a dict

# Show a nice table with some key data properties
import pandas as pd
datalist = pd.DataFrame.from_dict(openml_list, orient='index') 
datalist = datalist[[
    'did','name','NumberOfInstances',
    'NumberOfFeatures','NumberOfClasses'
]]
print("First 10 of %s datasets..." % len(datalist))
datalist.head(n=10)