# Identify a directory and download its content

In [1]:
import numpy as np
from osftools import osf_display as display
from osftools import osf_interact as interact

In [2]:
PROJECT_ID  = 'fuaqw'
PROJECT_URL = interact.format_url(PROJECT_ID)
PROJECT_DIR = '/Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3'

### Let's list all the directories in our project

In [3]:
dirlist, idlist = interact.url_to_idlist(PROJECT_URL, kind='folder')

In [4]:
for i in np.arange(len(dirlist)):
    print('Found directory {0} with ID {1}'.format(dirlist[i], idlist[i]))

Found directory Dataset_serotonin_calcium_March2018 with ID 5d2490391c5b4a001b9be7e1
Found directory Dataset_serotonin_TMPPAA_Nov2017 with ID 5d259f47a26b34001906e394
Found directory Dataset_vortioxetine_distorted_May2019 with ID 5d25d4b91c5b4a001a9e08e3
Found directory Dataset_vortioxetine_detergent_May2019 with ID 5d25e58245253a001a3ac99c


### Let's pick one and list all its files

In [5]:
DIR_NAME = dirlist[0]
DIR_ID   = idlist[0]
DIR_URL  = interact.format_url(PROJECT_ID, ID=DIR_ID)
filelist, fileidlist = interact.url_to_idlist(DIR_URL, kind='file')

In [6]:
for i in np.arange(len(filelist)):
    print('Found file {0} with ID {1}'.format(filelist[i], fileidlist[i]))

Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1260339_1260340_20180304_0556-7298_aligned_mic_DW_extract.star with ID 5d24917545253a001b399d14
Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1259911_1259912_20180304_0555-7297_aligned_mic_DW_extract.star with ID 5d249176114a4200180218d7
Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1259911_1259912_20180304_0555-7297_aligned_mic_DW.mrcs with ID 5d249176114a4200190217dc
Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1260339_1260340_20180304_0556-7298_aligned_mic_DW.mrcs with ID 5d2491771c5b4a001b9be94d
Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1260350_1260351_20180304_0557-7299_aligned_mic_DW.mrcs with ID 5d2491771c5b4a001b9be94e
Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1260350_1260351_20180304_0557-7299_aligned_mic_DW_extract.star with ID 5d249179a26b340019060daf
Found file GridSquare_1250569_Data_FoilHole_1279799_Data_1259911_1259912_20180304_0559-7300_aligned_mic_DW

There are two things that we need to improve: 
- we do not retrieve all the files here, only the first 10. 
- we are not interested in those files, we want to retrieve only the `.mrc`.

In [7]:
filelist, fileidlist = interact.list_all_files(DIR_URL, ext='star', npages_max=2)

Initial length of hit list is 5
npages = 2
Final length of hit list is 10


In [8]:
for i in np.arange(len(filelist)):
    print('Found file {0} with ID {1}'.format(filelist[i], fileidlist[i]))

Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1260339_1260340_20180304_0556-7298_aligned_mic_DW_extract.star with ID 5d24917545253a001b399d14
Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1259911_1259912_20180304_0555-7297_aligned_mic_DW_extract.star with ID 5d249176114a4200180218d7
Found file GridSquare_1250569_Data_FoilHole_1279798_Data_1260350_1260351_20180304_0557-7299_aligned_mic_DW_extract.star with ID 5d249179a26b340019060daf
Found file GridSquare_1250569_Data_FoilHole_1279799_Data_1259911_1259912_20180304_0559-7300_aligned_mic_DW_extract.star with ID 5d24917c1c5b4a001a9c4187
Found file GridSquare_1250569_Data_FoilHole_1279799_Data_1260339_1260340_20180304_0559-7301_aligned_mic_DW_extract.star with ID 5d24917d45253a001b399d23
Found file GridSquare_1250569_Data_FoilHole_1279799_Data_1260350_1260351_20180304_0600-7302_aligned_mic_DW_extract.star with ID 5d24917f1c5b4a001a9c4194
Found file GridSquare_1250569_Data_FoilHole_1279800_Data_1259911_1259912_20180304_

### Let's download them now

In [9]:
interact.download_files_from_list(PROJECT_URL, PROJECT_DIR+'/'+DIR_NAME+'/', fileidlist, filelist)

... Downloading https://osf.io/download/5d24917545253a001b399d14/ to /Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3/Dataset_serotonin_calcium_March2018/GridSquare_1250569_Data_FoilHole_1279798_Data_1260339_1260340_20180304_0556-7298_aligned_mic_DW_extract.star ...
... Downloading https://osf.io/download/5d249176114a4200180218d7/ to /Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3/Dataset_serotonin_calcium_March2018/GridSquare_1250569_Data_FoilHole_1279798_Data_1259911_1259912_20180304_0555-7297_aligned_mic_DW_extract.star ...
... Downloading https://osf.io/download/5d249179a26b340019060daf/ to /Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3/Dataset_serotonin_calcium_March2018/GridSquare_1250569_Data_FoilHole_1279798_Data_1260350_1260351_20180304_0557-7299_aligned_mic_DW_extract.star ...
... Downloading https://osf.io/download/5d24917c1c5b4a001a9c4187/ to /Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3/Dataset_serotonin_calcium_March2018/GridSquare_1250569_Data_FoilHole_1279

## More robust approach ?
The approach above does not seem very stable when many pages are involved. We try something now where we download the content of the page that we have been currently visiting, if we have not downloaded it yet...

In [10]:
interact.download_per_page(PROJECT_URL, PROJECT_DIR+'/'+DIR_NAME+'/', DIR_ID=DIR_ID, ext='mrc', start_page=616, end_page=617)

About to skim through 2 pages
Number of hits in page 616: 0
Number of hits in page 617: 4
... Downloading https://osf.io/download/5d24bcaa114a420019024167/ to /Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3/Dataset_serotonin_calcium_March2018/FoilHole_1259224_Data_1260339_1260340_20180302_1531-5585_sum_DW.mrc ...
... Downloading https://osf.io/download/bx86m/ to /Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3/Dataset_serotonin_calcium_March2018/FoilHole_1259223_Data_1259911_1259912_20180302_1527-5581_sum_DW.mrc ...
... Downloading https://osf.io/download/5d24bcada26b3400180622fa/ to /Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3/Dataset_serotonin_calcium_March2018/FoilHole_1259223_Data_1260350_1260351_20180302_1528-5583_sum_DW.mrc ...
... Downloading https://osf.io/download/5d24bcad114a42001902416e/ to /Users/fpoitevi/gdrive/cryoEM/Projects/Hugues_5ht3/Dataset_serotonin_calcium_March2018/FoilHole_1259224_Data_1259911_1259912_20180302_1530-5584_sum_DW.mrc ...
