# INM430 Week 10 Practical - LDS file download
Scrape London Datastore .xls(s) files
Idea was to search for overlapping column names, to then cross reference with health profiles.  
Got as far as downloading files.

In [5]:
# helper functions

def getLDSDownloadLinksPageCount():
    # get the number of London Datastore pages we can scrape
    # using the same general format from week 09
    from bs4 import BeautifulSoup
    import urllib.request as ur
    urlToScrape = "https://data.london.gov.uk/dataset"
    r = ur.urlopen(urlToScrape).read()
    soup = BeautifulSoup(r, "lxml")
    # looking for the paging links found near footer
    linkList = soup.find_all('li', attrs={'class': 'dp-search__pagelink'})
    # all being well, the list will look like this (where each line is a list element)
    
    #<li class="dp-search__pagelink dp-search__pagelink--disabled"><span>«</span></li>
    #<li class="dp-search__pagelink dp-search__pagelink--active"><span>1</span></li>
    #<li class="dp-search__pagelink"><a href="/dataset?page=2">2</a></li>
    #<li class="dp-search__pagelink"><a href="/dataset?page=3">3</a></li>
    #<li class="dp-search__pagelink"><a href="/dataset?page=4">4</a></li>
    #<li class="dp-search__pagelink"><span>...</span></li>
    #<li class="dp-search__pagelink"><a href="/dataset?page=78">78</a></li>
    #<li class="dp-search__pagelink"><a href="/dataset?page=2">»</a></li>
    
    # The line we are interested in is the next to last (page 78), number 78 being
    # the text property of the link (a href attribute), which is the 6th element of
    # the linkList list starting from index 0
    try:
        iPagenums = linkList[6].text
    except:
        # string data type for consistency
        iPagenums = "0"

    return int(iPagenums)

def getLDSDownloadLinks(iPagenum):
    # each page number will have a number of links,
    # with a label (name) and a url (href)
    from bs4 import BeautifulSoup
    import urllib.request as ur
    urlToScrape = "https://data.london.gov.uk/dataset?page=" + str(iPagenum)
    r = ur.urlopen(urlToScrape).read()
    soup = BeautifulSoup(r, "lxml")
    # look for h3 headers
    linkList = soup.find_all('h3', attrs={'class': 'dp-searchresult__heading'})
    # our return list
    results = []
    for linkListItem in linkList:
        try:
            linkHeader = linkListItem.find('a', attrs={'class': "dp-searchresult__heading-link"})
            name = linkHeader.text
            href = linkHeader['href']
            ldslinks = {
                "name" : name,
                "href" : href,
            }
        except Exception as e:
            print("Error - no links found")
        results.append(ldslinks)
    return results

def getLDSFileDownloadLinks(href):
    # get the file download links - pdf, xls, etc, decide later what to do
    from bs4 import BeautifulSoup
    import urllib.request as ur
    urlToScrape = "https://data.london.gov.uk" + href
    r = ur.urlopen(urlToScrape).read()
    soup = BeautifulSoup(r, "lxml")
    download_links = soup.find_all('div', attrs={'class': 'dp-resource__indented'})
    results = []
    for download_link in download_links:
        try:
            link = download_link.find('a', attrs={'class': 'dp-resource__format'})
            fileurl = link['href']
            links = {
                "fileurl" : fileurl,
            }
            results.append(links)
        except:
            # TODO add href to error message
            print("Error occured parsing file download links for href =", href)
    # return a list of dictionaries
    return results

def checkPath(path):
    # check if path or file exist
    from pathlib import Path
    retval = False
    # remove leading forward slash
    if(path[0] == '/'):
        path = path[1:]
    p = Path(path)
    if(p.exists() == True):
        retval = True
    return retval

def makeDir(path):
    # create directory if required
    from pathlib import Path
    # remove leading forward slash
    if(path[0] == '/'):
        path = path[1:]
    p = Path(path)
    if(p.exists() == False):
        Path(path).mkdir(parents=True, exist_ok=True)
        
def jsonifier(links, toplevel):
    import json
    jsonified = "{\"" + toplevel + "\":" + json.dumps(links) + "}"
    # to keep return type consistent with xmlifier
    return str.encode(jsonified)

def checkXls(downloadlink):
    isXls = False
    filetype = downloadlink[-4:]
    filetypes = ['.xls', 'xlsx']
    if(filetype in filetypes):
        isXls = True
    return isXls

def downloadFile(linkspath, downloadpath):
    # expected values
    # linkspath ~ /dataset/migration-indicators
    # downloadpath ~ /download/migration-indicators/0db19902-5013-42af-972d-0e5481d7ac44/Long%20term%20international%20migration.xlsx
    import urllib.request
    makeDir(linkspath)
    baseurl = 'https://data.london.gov.uk/'
    # file donwload link fdlink will look like
    # https://data.london.gov.uk/download/migration-indicators/0db19902-5013-42af-972d-0e5481d7ac44/Long%20term%20international%20migration.xlsx
    fdlink = baseurl + downloadpath[1:]  
    localfile = linkspath[1:] + '/' + downloadpath.split('/')[4]
    print("Downloading", fdlink)
    print("Saving to directory", localfile)
    #urllib.request.urlretrieve(fdlink, localfile)
    
def getDownloadLinks():
    # get the number of pages
    iPagenums = getLDSDownloadLinksPageCount()
    # initialise our links dictionary
    links = []
    # get the links to pages containling download file links
    maxi = 0
    for i in range(1, iPagenums + 1):
        maxi = i
        links.extend(getLDSDownloadLinks(i))
    print("Scraped", maxi, "page links")
    # 3. Create a new entry in our links dictionary, 
    #    consisting of another dictionary with all the available files for download (pdf, xls, etc)
    for i in range (0, len(links)):
        links[i]['fileurls'] = getLDSFileDownloadLinks(links[i]['href'])
    print("Scraped file download links")
    return links

def saveDownloadLinksToJSON(links):
    myjsonfile = jsonifier(links, "links")
    f = open('lds-links.json', 'wb')
    f.write(myjsonfile)
    print("Wrote LDS download links json lds-links.json file to disk.")
    f.close()
    
def loadJSONLinks():
    import json
    f = open('lds-links.json', 'r')
    data = json.load(f)
    links = data['links']
    f.close()
    return links

In [4]:
import datetime
# Download London Datastore .xls(x) files
# Timestamp start
now = datetime.datetime.now()
print ("London datastore download scraper started:", now.strftime("%Y-%m-%d %H:%M:%S"))

# get file download links
links = getDownloadLinks()

# save json file for future reference
saveDownloadLinksToJSON(links)

# file download count
k = 0
# iterate through list of dictionaries
for i in range (0, len(links)):
    # and check the download links dictionary within dictionary
    for link in links[i]['fileurls']:
        if(checkXls(link['fileurl'])):
            k = k + 1
            downloadFile(links[i]['href'], link['fileurl'])

print("Downloaded", k, ".xls(x) files")

# Timestamp end download link scraping
now = datetime.datetime.now()
print ("London datastore download scraper scraper ended:", now.strftime("%Y-%m-%d %H:%M:%S"))            

London datastore download scraper started: 2018-12-04 16:18:54
Downloaded 80 page links
Error occured parsing file download links for href = /dataset/curio-canopy-cover-geodatabase
Error occured parsing file download links for href = /dataset/addressbase-plus-for-contractors
Downloaded page link files
Wrote LDS download links json lds-links.json file to disk.
/dataset/migration-indicators
/download/migration-indicators/0db19902-5013-42af-972d-0e5481d7ac44/Long%20term%20international%20migration.xlsx


PermissionError: [Errno 13] Permission denied: 'download/migration-indicators/0db19902-5013-42af-972d-0e5481d7ac44/Long%20term%20international%20migration.xlsx'

In [8]:
# Sanity check, download based on saved json data
import datetime
# Download London Datastore .xls(x) files
# Timestamp start
now = datetime.datetime.now()
print ("London datastore file download started:", now.strftime("%Y-%m-%d %H:%M:%S"))

# carry on
links = loadJSONLinks()

# iterate through list of dictionaries
k = 0
for i in range (0, len(links)):
    # and check the download links dictionary within dictionary
    for link in links[i]['fileurls']:
        if(checkXls(link['fileurl'])):
            k = k + 1
            downloadFile(links[i]['href'], link['fileurl'])
            print("Download number:", k)
print("Downloaded", k, ".xls(x) files")    

import datetime
# Download London Datastore .xls(x) files
# Timestamp start
now = datetime.datetime.now()
print ("London datastore file download finished:", now.strftime("%Y-%m-%d %H:%M:%S"))

London datastore file download started: 2018-12-05 10:45:48
Downloading https://data.london.gov.uk/download/medium-term-economic-forecast/4d7c2717-e599-4968-9cd5-8e20b87ec001/GLA-london-economic-outlook-2018-11.xls
Saving to directory dataset/medium-term-economic-forecast/GLA-london-economic-outlook-2018-11.xls
Download number: 1
Downloading https://data.london.gov.uk/download/medium-term-economic-forecast/b12edf95-e0fe-4f3f-801f-658a601e97af/GLA-london-economic-outlook-2018-05.xls
Saving to directory dataset/medium-term-economic-forecast/GLA-london-economic-outlook-2018-05.xls
Download number: 2
Downloading https://data.london.gov.uk/download/medium-term-economic-forecast/94836c59-967d-4152-8e06-cd7f21eff23e/GLA-london-economic-outlook-2017-11.xls
Saving to directory dataset/medium-term-economic-forecast/GLA-london-economic-outlook-2017-11.xls
Download number: 3
Downloading https://data.london.gov.uk/download/medium-term-economic-forecast/a45af590-36b1-4651-aa23-835ac5572f1c/GLA-londo

Downloading https://data.london.gov.uk/download/public-transport-journeys-type-transport/a7a69c22-150c-49f3-a1fd-90d4c24d98d4/tfl-journeys-type.xls
Saving to directory dataset/public-transport-journeys-type-transport/tfl-journeys-type.xls
Download number: 67
Downloading https://data.london.gov.uk/download/london-food-strategy-consultation-2018/23e29736-c686-4c8a-8743-0aa23b3f8763/GLA%20consultation%20results%20-%20Unhealthy%20food%20advert%20ban.xls
Saving to directory dataset/london-food-strategy-consultation-2018/GLA%20consultation%20results%20-%20Unhealthy%20food%20advert%20ban.xls
Download number: 68
Downloading https://data.london.gov.uk/download/home-fire-safety-visits-by-london-borough/e2fdc768-a3eb-4984-b491-278c0d24d3da/Home%20Fire%20Safety%20Visits%20by%20London%20Borough.xlsx
Saving to directory dataset/home-fire-safety-visits-by-london-borough/Home%20Fire%20Safety%20Visits%20by%20London%20Borough.xlsx
Download number: 69
Downloading https://data.london.gov.uk/download/londo

Downloading https://data.london.gov.uk/download/employment-rates-by-ethnicity/cf8a5d62-6918-4046-a53b-627ef5dae54e/employment-rate-ethnic-borough.xls
Saving to directory dataset/employment-rates-by-ethnicity/employment-rate-ethnic-borough.xls
Download number: 100
Downloading https://data.london.gov.uk/download/economic-inactivity/aa2655ec-c06b-4f57-ae63-3402413bec42/economic-inactivity.xls
Saving to directory dataset/economic-inactivity/economic-inactivity.xls
Download number: 101
Downloading https://data.london.gov.uk/download/economic-inactivity/5edc4625-64d5-47cd-a3ef-f298d936bd4c/yearly-economic-inactivity-by-gender-reason.xls
Saving to directory dataset/economic-inactivity/yearly-economic-inactivity-by-gender-reason.xls
Download number: 102
Downloading https://data.london.gov.uk/download/unemployment-rate-region/8a29ec0c-9de3-4777-832f-49ef8c2b4d14/unemployment-region.xls
Saving to directory dataset/unemployment-rate-region/unemployment-region.xls
Download number: 103
Downloading 

Downloading https://data.london.gov.uk/download/london-plan-amr14-tables-and-data/e269c2e9-ebf6-43cd-8280-2927c6ba4339/AMR%2014%20Chapter%20Two%20Tables%20.xlsx
Saving to directory dataset/london-plan-amr14-tables-and-data/AMR%2014%20Chapter%20Two%20Tables%20.xlsx
Download number: 189
Downloading https://data.london.gov.uk/download/london-plan-amr14-tables-and-data/1ade71b6-0809-4600-9ff1-08ea40f2259e/LDD%20-%20Housing%20Approvals%20for%20AMR14.xlsx
Saving to directory dataset/london-plan-amr14-tables-and-data/LDD%20-%20Housing%20Approvals%20for%20AMR14.xlsx
Download number: 190
Downloading https://data.london.gov.uk/download/london-plan-amr14-tables-and-data/92842118-4b55-406b-91e2-c3b2e44a6bfb/LDD%20-%20Housing%20Starts%20for%20AMR14.xlsx
Saving to directory dataset/london-plan-amr14-tables-and-data/LDD%20-%20Housing%20Starts%20for%20AMR14.xlsx
Download number: 191
Downloading https://data.london.gov.uk/download/london-plan-amr14-tables-and-data/ad929204-cbe9-4bb2-bed7-1c1d28d210c9/L

Downloading https://data.london.gov.uk/download/senior-lfepa-staff-information-2015/a4c9944a-ad54-41dc-bf1a-ff0913aca302/Senior%20LFB%20staff%20information%20-%20at%201%20April%202018.xlsx
Saving to directory dataset/senior-lfepa-staff-information-2015/Senior%20LFB%20staff%20information%20-%20at%201%20April%202018.xlsx
Download number: 252
Downloading https://data.london.gov.uk/download/senior-lfepa-staff-information-2015/372545a3-290b-4152-9fcf-e640959208a2/Senior%20LFEPA%20staff%20information%20-%20December%202015.xlsx
Saving to directory dataset/senior-lfepa-staff-information-2015/Senior%20LFEPA%20staff%20information%20-%20December%202015.xlsx
Download number: 253
Downloading https://data.london.gov.uk/download/senior-lfepa-staff-information-2015/f3cadd5d-6eea-4149-9d03-b43baabd124a/lfepa-senior-staff-information-2015.xlsx
Saving to directory dataset/senior-lfepa-staff-information-2015/lfepa-senior-staff-information-2015.xlsx
Download number: 254
Downloading https://data.london.gov.

Downloading https://data.london.gov.uk/download/long-term-labour-market-projections/5a5eb3de-f2fa-49ea-90d1-2193084fc33e/London%20all%20scenarios%20no%20links%20for%20datastore%20final.xlsx
Saving to directory dataset/long-term-labour-market-projections/London%20all%20scenarios%20no%20links%20for%20datastore%20final.xlsx
Download number: 293
Downloading https://data.london.gov.uk/download/long-term-labour-market-projections/2f491391-aa88-4352-869c-afa833459058/2017%20London%20and%20sectors%20datastore%20-%20no%20links.xlsx
Saving to directory dataset/long-term-labour-market-projections/2017%20London%20and%20sectors%20datastore%20-%20no%20links.xlsx
Download number: 294
Downloading https://data.london.gov.uk/download/long-term-labour-market-projections/a3e7e2a8-e7ca-4f45-b1af-9921d7a46227/2017%20Borough%20employees%2C%20SE%20and%20employed%20datastore%20-%20no%20links.xlsx
Saving to directory dataset/long-term-labour-market-projections/2017%20Borough%20employees%2C%20SE%20and%20employed

Downloading https://data.london.gov.uk/download/key-stage-2-results-by-borough-old-measures/63e75e0b-b5c8-4b1f-a11a-f56a99e5a8ba/ks2-results-ethnicity-old.xls
Saving to directory dataset/key-stage-2-results-by-borough-old-measures/ks2-results-ethnicity-old.xls
Download number: 330
Downloading https://data.london.gov.uk/download/key-stage-2-results-by-borough-old-measures/e7060602-9122-4c10-8b29-4827935dec0c/ks2-results-language-old.xls
Saving to directory dataset/key-stage-2-results-by-borough-old-measures/ks2-results-language-old.xls
Download number: 331
Downloading https://data.london.gov.uk/download/key-stage-2-results-by-borough-old-measures/1b6fde1a-5d89-43b4-8e3b-78b910d12acb/ks2-results-fsm-old.xls
Saving to directory dataset/key-stage-2-results-by-borough-old-measures/ks2-results-fsm-old.xls
Download number: 332
Downloading https://data.london.gov.uk/download/equalities-diversity-and-inclusion-measures/675a1424-5e81-458d-ba08-3397a5258cb6/EDI%20measures%2024%20May%202018.xlsx
S

Downloading https://data.london.gov.uk/download/key-stage-1-results-by-borough/b7bd7815-11f1-4f6c-b77e-6291996a19cc/ks1-results-fsm.xls
Saving to directory dataset/key-stage-1-results-by-borough/ks1-results-fsm.xls
Download number: 424
Downloading https://data.london.gov.uk/download/economic-inactivity-gender/243c95bd-21b1-4cb0-b5c8-46127c7cb05a/economic-inactivity-by-gender-reason.xls
Saving to directory dataset/economic-inactivity-gender/economic-inactivity-by-gender-reason.xls
Download number: 425
Downloading https://data.london.gov.uk/download/fires-in-london---cause-of-ignition-is-white-goods/a2921da4-b1e4-4452-a54c-86b2b4f341a7/Fires%20in%20white%20goods%20from%202009.xlsx
Saving to directory dataset/fires-in-london---cause-of-ignition-is-white-goods/Fires%20in%20white%20goods%20from%202009.xlsx
Download number: 426
Downloading https://data.london.gov.uk/download/air-quality-focus-areas/4c3b8f5c-af90-41b5-9670-63c8f11fe566/focus-areas.xls
Saving to directory dataset/air-quality-f

Downloading https://data.london.gov.uk/download/registered-social-landlords-average-rents/5b2a5579-3b5f-4c67-b00a-7c1f57aa87a9/social-landlord-rents-borough.xls
Saving to directory dataset/registered-social-landlords-average-rents/social-landlord-rents-borough.xls
Download number: 486
Downloading https://data.london.gov.uk/download/households-local-authority-waiting-list-borough/dac0e8c2-94af-4c5e-b9b9-ef294bfe3915/households-on-local-authority-waiting-list.xls
Saving to directory dataset/households-local-authority-waiting-list-borough/households-on-local-authority-waiting-list.xls
Download number: 487
Downloading https://data.london.gov.uk/download/london-town-centre-health-check-analysis-report/a22fdd74-371a-4a13-acfd-6db32f645bab/2017%20London%20Town%20Centre%20Health%20Check%20Report%20Appendix%204%20-%20technical%20appendix.xlsx
Saving to directory dataset/london-town-centre-health-check-analysis-report/2017%20London%20Town%20Centre%20Health%20Check%20Report%20Appendix%204%20-%20t

Downloading https://data.london.gov.uk/download/gla-poll-results-2016/64a25830-909f-45f7-a682-9d98d2ed3d94/GLA%20Poll%20March%202016.xlsx
Saving to directory dataset/gla-poll-results-2016/GLA%20Poll%20March%202016.xlsx
Download number: 548
Downloading https://data.london.gov.uk/download/gla-poll-results-2016/5b5d5f09-1fb1-4526-9e62-14efa99ecfdf/GLA-28-wtd.xlsx
Saving to directory dataset/gla-poll-results-2016/GLA-28-wtd.xlsx
Download number: 549
Downloading https://data.london.gov.uk/download/police-and-crime-plan-consultation---october-to-november-2016/a063c617-1313-4c55-a931-fcba7d365b54/Police%20and%20Crime%20Plan%20-%20Talk%20London%20%26%20TNS%20crosstabs%20COMBINED.xlsx
Saving to directory dataset/police-and-crime-plan-consultation---october-to-november-2016/Police%20and%20Crime%20Plan%20-%20Talk%20London%20%26%20TNS%20crosstabs%20COMBINED.xlsx
Download number: 550
Downloading https://data.london.gov.uk/download/police-and-crime-plan-consultation---october-to-november-2016/64cd25

Downloading https://data.london.gov.uk/download/hospital-admissions-due-injury-children-borough/24724e77-cd8e-4561-b7e4-42c5895596dc/child-injury-borough.xls
Saving to directory dataset/hospital-admissions-due-injury-children-borough/child-injury-borough.xls
Download number: 646
Downloading https://data.london.gov.uk/download/2011-census-housing/e02652a4-85cb-4a83-821b-a497fff529c0/trends-in-overcrowding-data.xls
Saving to directory dataset/2011-census-housing/trends-in-overcrowding-data.xls
Download number: 647
Downloading https://data.london.gov.uk/download/2011-census-housing/51b968fa-b526-46b3-93d1-5b0e36571eed/housing-characteristics-data.xls
Saving to directory dataset/2011-census-housing/housing-characteristics-data.xls
Download number: 648
Downloading https://data.london.gov.uk/download/2011-census-housing/3a3bf4ec-26af-420a-a82b-3eeb96e0217b/tenure-by-economic-activity-data.xls
Saving to directory dataset/2011-census-housing/tenure-by-economic-activity-data.xls
Download number

Downloading https://data.london.gov.uk/download/london-assembly-research/9d284613-ab6c-43f7-8baf-853daa2eeded/TNS-Internship-Survey-Aug-2014-Data.xlsx
Saving to directory dataset/london-assembly-research/TNS-Internship-Survey-Aug-2014-Data.xlsx
Download number: 731
Downloading https://data.london.gov.uk/download/london-assembly-research/871e4977-5736-493c-82ec-485b5273fb9e/Talk-London-Internship-Survey-Nov-2014-Data.xlsx
Saving to directory dataset/london-assembly-research/Talk-London-Internship-Survey-Nov-2014-Data.xlsx
Download number: 732
Downloading https://data.london.gov.uk/download/borough-council-election-results-2014/8a5db177-9f4b-483a-92a0-7bd5f699f0b5/local-elections-summary-2014.xls
Saving to directory dataset/borough-council-election-results-2014/local-elections-summary-2014.xls
Download number: 733
Downloading https://data.london.gov.uk/download/borough-council-election-results-2014/efbfa0a6-da03-4f7c-b1da-dcd9095bed59/london_council_election_2014_ward.xls
Saving to direc

Downloading https://data.london.gov.uk/download/lsoa-atlas/5f6c40bf-cf32-4594-b2b6-78303ef0b45e/lsoa-data-old-boundaries.xls
Saving to directory dataset/lsoa-atlas/lsoa-data-old-boundaries.xls
Download number: 788
Downloading https://data.london.gov.uk/download/global-city-data/ffcefcba-829c-4220-911f-d4bf17ef75d6/global-city-indicators.xlsx
Saving to directory dataset/global-city-data/global-city-indicators.xlsx
Download number: 789
Downloading https://data.london.gov.uk/download/2011-census-assembly/e6363c97-dc0b-4b91-94e9-f0cf6503d4bb/census-assembly-dashboard.xls
Saving to directory dataset/2011-census-assembly/census-assembly-dashboard.xls
Download number: 790
Downloading https://data.london.gov.uk/download/census-dashboards/bcca8a1a-d754-41d1-aa41-3f8880c5f74e/census-dashboard.xls
Saving to directory dataset/census-dashboards/census-dashboard.xls
Download number: 791
Downloading https://data.london.gov.uk/download/census-dashboards/30e0bb59-d56c-489b-9887-4edab8654b2b/census-dash

Downloading https://data.london.gov.uk/download/languages-spoken-pupils-borough-msoa/3f1da637-7be8-4375-85a4-15a2eb8da7fa/languages-pupils-msoa.xls
Saving to directory dataset/languages-spoken-pupils-borough-msoa/languages-pupils-msoa.xls
Download number: 867
Downloading https://data.london.gov.uk/download/languages-spoken-pupils-borough-msoa/926a2bf8-72ba-4c63-ad74-85d8592dd03a/languages-pupils-borough.xls
Saving to directory dataset/languages-spoken-pupils-borough-msoa/languages-pupils-borough.xls
Download number: 868
Downloading https://data.london.gov.uk/download/public-toilets/7b9b84fb-a65b-43ce-83fc-e761946d715b/public-toilet-borough-grid.xls
Saving to directory dataset/public-toilets/public-toilet-borough-grid.xls
Download number: 869
Downloading https://data.london.gov.uk/download/alternative-vote-referendum-results-borough/830d98f1-e7d5-45fe-8724-3601bbd3d67a/av-ref-results-borough.xls
Saving to directory dataset/alternative-vote-referendum-results-borough/av-ref-results-borou

Downloading https://data.london.gov.uk/download/accessibility-london-underground-stations/2df7a1e4-0d43-432a-b54d-ac7dc4a9b467/tfl-lu-station-accessibility-object-issues.xls
Saving to directory dataset/accessibility-london-underground-stations/tfl-lu-station-accessibility-object-issues.xls
Download number: 900
Downloading https://data.london.gov.uk/download/accessibility-london-underground-stations/bee62c83-2ae5-4163-b6c9-ded57a93a3e7/tfl-lu-station-accessibility-barriers.xls
Saving to directory dataset/accessibility-london-underground-stations/tfl-lu-station-accessibility-barriers.xls
Download number: 901
Downloading https://data.london.gov.uk/download/accessibility-london-underground-stations/3eb9987c-5fba-4774-93d2-a5c4851971e5/tfl-lu-station-accessibility-escalators.xls
Saving to directory dataset/accessibility-london-underground-stations/tfl-lu-station-accessibility-escalators.xls
Download number: 902
Downloading https://data.london.gov.uk/download/accessibility-london-underground

In [192]:
# as an aside, links could be rebuilt from json file
# download excel files
# Validate links rebuilding from json file
# import json
# f = open('lds-links.json', 'r')
# data = json.load(f)
# links = data['links']
# f.close()

# Extras, sanity checks

In [14]:
# this is the path on website where download link is to be found e.g.
# https://data.london.gov.uk/dataset/diversity-london-report-data
path = '/dataset/diversity-london-report-data'
# note we will use the same path locally to store the downloaded file

# this is the file donwload location e.g.
# https://data.london.gov.uk/download/diversity-london-report-data/66ee75f0-3424-4333-8ebf-d227bc74b562/diversity-in-london-data.xlsx
downloadlink = '/download/diversity-london-report-data/66ee75f0-3424-4333-8ebf-d227bc74b562/diversity-in-london-data.xlsx'
# So we check if the file exists locally e.g.
# /dataset/diversity-london-report-data/66ee75f0-3424-4333-8ebf-d227bc74b562/diversity-in-london-data.xlsx'
# downloadlink = downloadlink.split('/')
downloadFile(url, localPath)

NameError: name 'url' is not defined

In [16]:
# another sanity check
localPath = '/dataset/medium-term-economic-forecast'
url = '/download/medium-term-economic-forecast/ec246c96-7e35-4661-820f-da01ab123d08/gla-london-economic-outlook-2015-11.xls'
downloadFile2(url, localPath)

PermissionError: [Errno 13] Permission denied: 'download/medium-term-economic-forecast/ec246c96-7e35-4661-820f-da01ab123d08/gla-london-economic-outlook-2015-11.xls'

In [262]:
path[1:]

'dataset/diversity-london-report-data'

In [235]:
localDowloadPath = path + '/' + downloadlink.split('/')[4]
localDowloadPath[1:]

'dataset/diversity-london-report-data/diversity-in-london-data.xlsx'

In [234]:
downloadlink.split('/')[4]

'diversity-in-london-data.xlsx'