# Extracting the dataset

To extract the dataset we use the libraries [requests](http://docs.python-requests.org/en/master/) and [Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/). With requests we will make the http request of the files.

In [2]:
import requests
from bs4 import BeautifulSoup

In [46]:
def parse_subject(subject: int) -> str:
    """Method to add zeros in the left of subject
    :param subject: Identifier of the subject to analyze.
    """
    return 'S'+'0'*(3-len(str(subject)))+str(subject)

def get_eeg_data(url: str, subjects: [int], runs: [str], store: bool = True, directory: str = "eeg_data") -> None:
    """Function to extract the eeg file giving the url and inputs subject and runs.
    
    Args:
        url: The URL of the eeg file from Physionet.
        subjects: Array with the identifier of the subjects we want to download.
        runs: Array of the runs we want to store from the webpage.
        
    Returns:
        The files to be saved if store is False store otherwise.
    """
    import os
    from itertools import product
    # Create directory if it doesn't exist
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    # Adding format 
    url = url+"{subject}/{subject}{run}.edf"
    
   
    # Creating base urls per subject
    for eeg in map(lambda pairs: url.format(subject=parse_subject(pairs[0]), run=pairs[1]), product(subjects, runs)):
        with open(directory+'/'+eeg.split('/')[-1], 'w') as eeg_file:
            print(requests.get(eeg, encoding='utf-8').text)
            eeg_file.write(str(requests.get(eeg).text))    

In [47]:
get_eeg_data(url="https://physionet.org/physiobank/database/eegmmidb/", subjects=[38, 39], runs=['R01', 'R02'])

TypeError: request() got an unexpected keyword argument 'encoding'

In [9]:
url = "https://physionet.org/physiobank/database/eegmmidb/"

In [10]:
url = url+"{subject}/{subject}{run}.edf"

In [23]:
from itertools import product
list(product([1, 2], ['C', 'D']))

[(1, 'C'), (1, 'D'), (2, 'C'), (2, 'D')]

In [12]:
url.format(subject='S38',run="R01")

'https://physionet.org/physiobank/database/eegmmidb/S38/S38R01.edf'