# Download Plant Data from Zenodo

## Improvements
1. Use CHKSUM to see if files need downloading
1. Save the intial request information for later viewing

In [1]:
import requests
import os

In [2]:
def download_file(url,outfile):
    
    get_response = requests.get(url,stream=True)
    
    chunk_number = 0
    with open(outfile, 'wb') as f:
        
        for chunk in get_response.iter_content(chunk_size=1024*1024):
            
            chunk_number = chunk_number + 1
            
            print(str(chunk_number) + ' MB downloaded', end='\r')
            
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

In [3]:
def download_asset_data(asset):
    
    if asset == 'kelmarsh':
        record_id = '5841834'
    elif asset == 'penmanshiel':
        record_id = '5946808'
    else:
        raise NameError('Zenodo record id undefined for: ' + asset)

        
    url_zenodo = r'https://zenodo.org/api/records/'

    r = requests.get(url_zenodo + record_id)
    
    r_json = r.json()
    
    
    print('======')
    print('Title: ' + r_json['metadata']['title'])
    print('Version: ' + r_json['metadata']['version'])
    print('URL: ' + r_json['links']['latest_html'])
    print('Record DOI: ' + r_json['doi'])
    print('License: ' + r_json['metadata']['license']['id'])
    print('======\n')
    
    
    outfile_path = 'data/' + asset + '/'
    
    if not os.path.exists(outfile_path):
        os.makedirs(outfile_path)
    
    
    files = r_json['files']
    for f in files:
        
        url_file = f['links']['self']
        
        file_name = f['key']
                
        outfile = outfile_path + file_name
        
        print('Downloading: ' + file_name)
        print('File size: ' + str(round(f['size']/(1024*1024),2)) + 'MB')       
               
        download_file(url_file,outfile)
        
        print('Saved to: ' + outfile + '\n')

In [4]:
asset = 'penmanshiel'

In [5]:
download_asset_data(asset)

Title: Penmanshiel Wind Farm Data
Version: 0.0.2
URL: https://zenodo.org/record/5946808
Record DOI: 10.5281/zenodo.5946808
License: CC-BY-4.0

Downloading: Penmanshiel_28.7MW_14xSenvion_MM82.kmz
File size: 0.02MB
Saved to: data/penmanshiel/Penmanshiel_28.7MW_14xSenvion_MM82.kmz

Downloading: Penmanshiel_Grid_3153.zip
File size: 1.23MB
Saved to: data/penmanshiel/Penmanshiel_Grid_3153.zip

Downloading: Penmanshiel_PMU_3152.zip
File size: 52.21MB
Saved to: data/penmanshiel/Penmanshiel_PMU_3152.zip

Downloading: Penmanshiel_SCADA_2016_WT01-10_3107.zip
File size: 122.95MB
Saved to: data/penmanshiel/Penmanshiel_SCADA_2016_WT01-10_3107.zip

Downloading: Penmanshiel_SCADA_2016_WT11-15_3107.zip
File size: 53.33MB
Saved to: data/penmanshiel/Penmanshiel_SCADA_2016_WT11-15_3107.zip

Downloading: Penmanshiel_SCADA_2017_WT01-10_3114.zip
File size: 260.86MB
Saved to: data/penmanshiel/Penmanshiel_SCADA_2017_WT01-10_3114.zip

Downloading: Penmanshiel_SCADA_2017_WT11-15_3115.zip
File size: 145.81MB
Save

In [6]:
url_zenodo = r'https://zenodo.org/api/records/'

r = requests.get(url_zenodo + '5841834')

In [7]:
r_json = r.json()

In [8]:
r_json

{'conceptdoi': '10.5281/zenodo.5841833',
 'conceptrecid': '5841833',
 'created': '2022-02-01T13:52:05.289877+00:00',
 'doi': '10.5281/zenodo.5841834',
 'files': [{'bucket': '96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12',
   'checksum': 'md5:9b40d215744e437f96786a4da2d2845d',
   'key': 'Kelmarsh_12.3MW_6xSenvion_MM92.kmz',
   'links': {'self': 'https://zenodo.org/api/files/96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12/Kelmarsh_12.3MW_6xSenvion_MM92.kmz'},
   'size': 38358,
   'type': 'kmz'},
  {'bucket': '96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12',
   'checksum': 'md5:6d8f04ad9d25f9b665ebf42efac7ec4b',
   'key': 'Kelmarsh_Grid_3088.zip',
   'links': {'self': 'https://zenodo.org/api/files/96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12/Kelmarsh_Grid_3088.zip'},
   'size': 1518627,
   'type': 'zip'},
  {'bucket': '96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12',
   'checksum': 'md5:18e8ea74eb86ab289b4c471a1b0e833a',
   'key': 'Kelmarsh_PMU_3089.zip',
   'links': {'self': 'https://zenodo.org/api/files/96ea78d8-5046-4fd7-b6bd-c8bc6c

In [10]:
r.content

b'{"conceptdoi":"10.5281/zenodo.5841833","conceptrecid":"5841833","created":"2022-02-01T13:52:05.289877+00:00","doi":"10.5281/zenodo.5841834","files":[{"bucket":"96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12","checksum":"md5:9b40d215744e437f96786a4da2d2845d","key":"Kelmarsh_12.3MW_6xSenvion_MM92.kmz","links":{"self":"https://zenodo.org/api/files/96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12/Kelmarsh_12.3MW_6xSenvion_MM92.kmz"},"size":38358,"type":"kmz"},{"bucket":"96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12","checksum":"md5:6d8f04ad9d25f9b665ebf42efac7ec4b","key":"Kelmarsh_Grid_3088.zip","links":{"self":"https://zenodo.org/api/files/96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12/Kelmarsh_Grid_3088.zip"},"size":1518627,"type":"zip"},{"bucket":"96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12","checksum":"md5:18e8ea74eb86ab289b4c471a1b0e833a","key":"Kelmarsh_PMU_3089.zip","links":{"self":"https://zenodo.org/api/files/96ea78d8-5046-4fd7-b6bd-c8bc6c17ce12/Kelmarsh_PMU_3089.zip"},"size":68911984,"type":"zip"},{"bucket":"96ea78d8-5046-4f