# Download HGNC datasets

In [1]:
import os
import ftplib
import datetime

In [2]:
def download_file(ftp, ftp_path, local_path):
    """
    Download a file from an open ftp connection.
    Returns the ISO formatted last modified time.
    """
    # Get last modified datetime
    modified_time = ftp.sendcmd('MDTM ' + ftp_path)
    modified_time = datetime.datetime.strptime(modified_time[4:], "%Y%m%d%H%M%S")
    modified_time = modified_time.isoformat()
    
    # Download remote file to local path
    with open(local_path, 'wb') as write_file:
        ftp.retrbinary('RETR '+ ftp_path, write_file.write)
    
    return modified_time

def download_files_in_directory(ftp, ftp_directory, local_directory):
    """
    Download all files from a remote ftp directory to a local directory.
    Prints filenames and their last modified dates.
    """
    os.makedirs(local_directory, exist_ok=True)
    for ftp_path in ftp.nlst(ftp_directory):
        _, filename = os.path.split(ftp_path)
        local_path = os.path.join(local_directory, filename)
        modified_time = download_file(ftp, ftp_path, local_path)
        print(modified_time , filename)

## Connect to EBI FTP site

In [3]:
ftp = ftplib.FTP('ftp.ebi.ac.uk')
ftp.login()

'230 Login successful.'

## Download gene family data

In [4]:
ftp_directory = '/pub/databases/genenames/new/csv/genefamily_db_tables'
local_directory = os.path.join('download', 'genefamily_db_tables')
download_files_in_directory(ftp, ftp_directory, local_directory)

2016-07-13T16:22:11 README.txt
2017-01-25T06:13:11 external_resource.csv
2017-01-25T06:13:11 family.csv
2017-01-25T06:13:11 family_has_external_resource.csv
2017-01-25T06:13:11 gene_has_family.csv
2017-01-25T06:13:11 hierarchy.csv
2017-01-25T06:13:11 hierarchy_closure.csv


## Download gene table

In [5]:
filename = 'hgnc_complete_set.txt'
ftp_path = '/pub/databases/genenames/new/tsv/' + filename
local_path =  os.path.join('download', filename)
download_file(ftp, ftp_path, local_path)

'2017-01-25T05:53:32'

## Close FTP connection

In [6]:
ftp.quit()

'221 Goodbye.'