<h2>PCQM4Mv2</h2>
<h4>Predicting a quantum property of molecules</h4>
<h4><a href="https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/pcqm4m-v2.zip">Click here to download dataset from source</a></h4>

In [1]:
import os
import requests
import shutil
import zipfile

In [2]:
DATASET_URL = "https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/pcqm4m-v2.zip"

In [3]:
DATA_FOLDER = os.path.join("..", "data")

if not os.path.exists(DATA_FOLDER):
    os.makedirs(DATA_FOLDER)

In [4]:
def download_data(url, folder, filename):
    """
    Downloads a file via URL and save it to a custom path.
    ...
    Parameters
    ----------
    url: str
        The file URL
    folder: str
        The destination folder to save file
    filename: str
        The name to save file
    """
    
    try:
        r = requests.get(url)
        if r.status_code == 200:
            print("Data downloaded successfully!")
            
            filepath = os.path.join(folder, filename)
            with open(filepath, "wb") as f:
                f.write(r.content)
            print(f"Data saved at: '{filepath}'")
                
    except Exception as e:
        print("An error ocurred while trying to download/save the requested data.")

In [5]:
download_data(DATASET_URL, DATA_FOLDER, filename="pcqm4m-v2.zip")

Data downloaded successfully!
Data saved at: '..\data\pcqm4m-v2.zip'


In [6]:
def unzip_data(folder, filename):
    with zipfile.ZipFile(os.path.join(folder, filename),"r") as zip_ref:
        zip_ref.extractall(folder)
    print(f"Unzipped '{filename}' at: '{folder}'")

In [7]:
unzip_data(DATA_FOLDER, filename="pcqm4m-v2.zip")

Unzipped 'pcqm4m-v2.zip' at: '..\data'


In [8]:
def move_file(original_path, final_path, filename):
    original_file_path = os.path.join(original_path, filename)
    final_file_path = os.path.join(final_path, filename)
    shutil.move(original_file_path, final_file_path)
    print(f"Moved '{filename}' from '{original_path}' to '{final_path}'")

In [10]:
move_file(original_path=os.path.join(DATA_FOLDER, "pcqm4m-v2", "raw"),
          final_path=DATA_FOLDER,
          filename="data.csv.gz")

Moved 'data.csv.gz' from '..\data\pcqm4m-v2\raw' to '..\data'


In [11]:
def remove_file(filepath):
    os.remove(filepath)
    print(f"Removed '{filepath}' !")

def remove_dir(dirpath):
    shutil.rmtree(dirpath)
    print(f"Removed '{dirpath}' !")

In [12]:
remove_file(os.path.join(DATA_FOLDER, "pcqm4m-v2.zip"))
remove_dir(os.path.join(DATA_FOLDER, "pcqm4m-v2"))

Removed '..\data\pcqm4m-v2.zip' !
Removed '..\data\pcqm4m-v2' !
