# Download All Raw Data
Gets all of the raw data needed by this study using Globus and HTTP download. You will need Globus installed to use this notebook.

In [1]:
from mdf_toolbox import login
import requests
import cgi
import os

Things to change

In [2]:
g4mp2_location = 'https://data.materialsdatafacility.org/published/publication_1335/data/g4mp2-gdb9.db'
qm9_id = [1057646, 1057644]
dest_path = os.path.abspath(os.path.join('data', 'input'))

## Get the G4MP2 Data
We need the G4MP2 calculations of all of the molecules in the QM9-G4MP2 dataset. The data is an ASE DB that is [available in the MDF](http://dx.doi.org/doi:10.18126/M23P9G). We will download it using HTTP

### Get a Authorization Tool
The `mdf_toolbox` includes tools for streamlining getting the credentials needed to access data on the MDF. 

In [3]:
mdf_auth = login(app='jcesr_ml', services=['data_mdf'])['data_mdf']

### Download from the MDF
We need to download the ASE db file

In [4]:
def get_from_mdf(url, path):
    """Download data from the MDF
    
    Args:
        url (str): Path to save file
        path (str): Download path
    """
    
    filename = os.path.basename(url)
    local_path = os.path.join(path, filename)
    
    # Get the auth headers
    headers = {}
    mdf_auth.set_authorization_header(headers)
    
    # Download file
    req = requests.get(url, headers=headers, stream=True)
    with open(local_path, 'wb') as fp:
        for chunk in req.iter_content(chunk_size=1024 ** 2):
            fp.write(chunk)
get_from_mdf(g4mp2_location, dest_path)

## Download the QM9 Data from Figshare
We need the data file and the list of uncharacterized molecules

In [5]:
def get_from_figshare(fid, path):
    """Download a file from figshare
    
    Args:
        fid (int): ID number of figshare article
        path (str): Download path
    """
    
    # Get the article details
    art_details = requests.get('https://api.figshare.com/v2/articles/{}/files'.format(fid)).json()
    
    # Loop over each file
    for detail in art_details:
        # Make the download path
        filename = detail['name']
        data_path = os.path.join(path, filename)
        
        # Downlaod the file
        req = requests.get(art_details[0]['download_url'], stream=True)
        with open(data_path, 'wb') as fp:
            for chunk in req.iter_content(chunk_size=1024 ** 2):
                fp.write(chunk)

In [6]:
for fid in qm9_id:
    get_from_figshare(fid, dest_path)