In [1]:
import os
import shutil

from collections import Counter
from zipfile import ZipFile

In [7]:
import file_transfer.datamover as dm

In [8]:
s3client = dm.S3EnramHandler("lw-enram")

In [9]:
s3client.bucket_name

'lw-enram'

# Introduction

# Functionalities

4. Parse the file name for metadata, e.g. `dkrom_vp_20170114231500.h5`:

    * **country**: 2 characters `dk`
    * **radar**: 3 characters `rom`
    * ignore `_vp_`
    * **year**: 4 characters `2017`
    * **month**: 2 characters `01`
    * **day**: 2 characters `14`
    * **hour**: 2 characters `23`
    * **minutes**: 2 characters `00`

# Baltrad to S3

In [10]:
from file_transfer.creds import URL, LOGIN, PASSWORD

In [12]:
btos = dm.BaltradToS3(URL, LOGIN, PASSWORD, "lw-enram")

In [22]:
btos.transfer(name_match="_vp_", overwrite=False, limit=6)
btos.report(reset_file=False, transfertype="Baltrad to S3")

In [23]:
!ls

bash_file_info.py  enram_api.py  file_handling.ipynb  LICENSE	 schema.svg
coverage.csv	   example_data  file_transfer	      logtest	 Untitled.ipynb
database	   example.py	 hdf5_handling	      README.md


In [24]:
! cat logtest

-------------------------------------------------------
Data transfer at 2017-02-13 14:38 from Baltrad to S3:
-------------------------------------------------------

Files not transferred:


Files succesfully transferred:
czbrd_vp_20170210233000.h5
czbrd_vp_20170210234500.h5


-------------------------------------------------------
Data transfer at 2017-02-13 17:02 from Baltrad to S3:
-------------------------------------------------------

Files not transferred:
czbrd_vp_20170210233000.h5
czbrd_vp_20170210234500.h5

Files succesfully transferred:
czbrd_vp_20170211014500.h5
czbrd_vp_20170211060000.h5


-------------------------------------------------------
Data transfer at 2017-02-13 17:03 from Baltrad to S3:
-------------------------------------------------------

Files not transferred:
czbrd_vp_20170210233000.h5
czbrd_vp_20170210234500.h5
czbrd_vp_20170211014500.h5
czbrd_vp_20170211060000.h5

Files succesfully transferred:



-----------------

In [25]:
btos.transferred

['czbrd_vp_20170211061500.h5', 'czbrd_vp_20170211090000.h5']

# Coverage

In [11]:
# Connecto to S3 client
s3client = ft.S3EnramHandler("lw-enram")

In [12]:
# Rerun file list overview to extract the current coverage
coverage_count = s3client.count_enram_coverage(level='day')

In [13]:
with open("coverage.csv", 'w') as outfile:
    ft.coverage_to_csv(outfile, coverage_count)    

In [14]:
!ls

bash_file_info.py  enram_api.py  file_handling.ipynb  LICENSE	 schema.svg
coverage.csv	   example_data  file_transfer	      logtest	 Untitled.ipynb
database	   example.py	 hdf5_handling	      README.md


In [15]:
s3client.upload_file("coverage.csv", "coverage.csv")

## ZIP provision

* OPTION: redo it for all OR redo it for those months an update is done
* READ all file keys to local directory => ZIP => upload to S3

## subfunctionality: for a list of filenames -> update those months

In [7]:
keyset = ['bewid_vp_20161120233000.h5', 
          'bewid_vp_20161120233500.h5',
          'bewid_vp_20161120234000.h5',
          'bewid_vp_20161120234500.h5',
          'bewid_vp_20161120235000.h5',
          'bewid_vp_20161120235500.h5',
          'bejab_vp_20161120235000.h5']

keyset = ["filuo_vp_20170117133000.h5"]

In [8]:
s3client.create_zip_version(keyset)

Saved  filuo201701.zip on S3 bucket.


### OPTION 1: ENLIST ALL

Get information about which zips to create from the entire coverage...

In [6]:
s3client.create_zip_version(s3client.count_enram_coverage(level="month"))

Saved  selek201701.zip on S3 bucket.
Saved  deemd201611.zip on S3 bucket.
Saved  frgre201701.zip on S3 bucket.
Saved  bewid201611.zip on S3 bucket.
Saved  frmcl201701.zip on S3 bucket.
Saved  frgre201611.zip on S3 bucket.
Saved  skkub201701.zip on S3 bucket.
Saved  frtou201611.zip on S3 bucket.
Saved  esalm201611.zip on S3 bucket.
Saved  dkbor201611.zip on S3 bucket.
Saved  frnim201701.zip on S3 bucket.
Saved  frabb201611.zip on S3 bucket.
Saved  dedrs201611.zip on S3 bucket.
Saved  plpoz201701.zip on S3 bucket.
Saved  frlep201611.zip on S3 bucket.
Saved  fianj201701.zip on S3 bucket.
Saved  frbou201701.zip on S3 bucket.
Saved  essan201611.zip on S3 bucket.
Saved  frniz201611.zip on S3 bucket.
Saved  fikes201611.zip on S3 bucket.
Saved  denhb201701.zip on S3 bucket.
Saved  fivim201611.zip on S3 bucket.
Saved  eslid201611.zip on S3 bucket.
Saved  plrze201701.zip on S3 bucket.
Saved  deoft201611.zip on S3 bucket.
Saved  fropo201611.zip on S3 bucket.
Saved  desna201701.zip on S3 bucket.
S

EndpointConnectionError: Could not connect to the endpoint URL: "https://lw-enram.s3.amazonaws.com/fi/luo/2017/01/17/13/filuo_vp_20170117133000.h5"

EndpointConnectionError: Could not connect to the endpoint URL: "https://lw-enram.s3.amazonaws.com/fi/luo/2017/01/17/13/filuo_vp_20170117133000.h5"

### Always all zip would be too much of work/loading => discriminate

#### BASED ON A specific radar/land

In [22]:
country = "be"
radar = "wid"

In [23]:
keyset = []
for key in s3client.list_files_path(paths="/".join([country, radar])):
    print(os.path.split(key))
    keyset.append(os.path.split(key)[1])
    s3client.download_file(key)
#s3client.create_zip_version(keyset)

('be/wid/2016/11/20/23', 'bewid_vp_20161120233000.h5')
('be/wid/2016/11/20/23', 'bewid_vp_20161120233500.h5')
('be/wid/2016/11/20/23', 'bewid_vp_20161120234000.h5')
('be/wid/2016/11/20/23', 'bewid_vp_20161120234500.h5')
('be/wid/2016/11/20/23', 'bewid_vp_20161120235000.h5')
('be/wid/2016/11/20/23', 'bewid_vp_20161120235500.h5')
('be/wid/2016', 'bewid201611.zip')


In [17]:
keyset = []
for key in s3client.list_files_path(paths="/".join([country, radar])):
    print(os.path.split(key))
    keyset.append(os.path.split(key)[1])
s3client.create_zip_version(keyset)

('de/drs/2016/11/20/23', 'dedrs_vp_20161120234500.h5')
('de/drs/2016', 'dedrs201611.zip')
('de/drs/2017/01/15/03', 'dedrs_vp_20170115031500.h5')
('de/drs/2017/01/15/03', 'dedrs_vp_20170115034500.h5')
('de/drs/2017/01/15/05', 'dedrs_vp_20170115053000.h5')
('de/drs/2017/01/16/04', 'dedrs_vp_20170116041500.h5')
('de/drs/2017/01/16/05', 'dedrs_vp_20170116051500.h5')
('de/drs/2017/01/16/15', 'dedrs_vp_20170116154500.h5')
('de/drs/2017/01/16/18', 'dedrs_vp_20170116184500.h5')
('de/drs/2017/01/17/01', 'dedrs_vp_20170117013000.h5')
('de/drs/2017/01/17/07', 'dedrs_vp_20170117070000.h5')
('de/drs/2017', 'dedrs201701.zip')
Saved  dedrs201611.zip on S3 bucket.
Saved  dedrs201701.zip on S3 bucket.


#### BASED ON A LIST OF FILES -> only for those combinations

In [26]:
btos.transferred

['czbrd_vp_20170211061500.h5', 'czbrd_vp_20170211090000.h5']

In [27]:
s3client

<file_transfer.s3enram.S3EnramHandler at 0x7f2919032898>

In [28]:
s3client.create_zip_version(btos.transferred)

Saved  czbrd201702.zip on S3 bucket.


#### BASED ON A TIMING -> only after a specific date

TODO!

In [75]:
file_count = s3client.count_enram_coverage(level='month')
for file_info, count in file_count.items(): # all these months -> create a zip
    # GET INFO
    country, radar, year, month = parse_coverage_month(file_info)
    
    # DOWNLOAD FILES
    current_keys = []
    for key in s3client.list_files_path(paths="/".join([country, radar, year, month])):
        s3client.download_file(key)
        current_keys.append(key)
        
    # CREATE ZIP
    zip_name = "".join([country, radar, year, month, ".zip"])
    with ZipFile(zip_name, 'w') as ziph:
        for key in current_keys:
            path_info, fname = os.path.split(key)
            file_info = parse_filename(fname.split("/")[-1])
            ziph.write(os.path.join(".", file_info['country'], file_info['radar'], file_info['year'], 
                                    file_info['month'], file_info['day'], file_info['hour'], fname), 
                       os.path.join(file_info['day'], file_info['hour'], fname))
    # SAVE IN S3
    key_name = "/".join([file_info['country'], file_info['radar'], file_info['year'], zip_name])
    s3client.upload_file(filename=zip_name, object_key=key_name)
    
    # REMOVE ZIP
    os.remove(zip_name)
    
    # REMOVE FOLDERS
    shutil.rmtree(os.path.join(".", file_info['country'], file_info['radar'], 
                               file_info['year'], file_info['month']))
    os.removedirs(os.path.join(".", file_info['country'], file_info['radar'], file_info['year'])) 

EndpointConnectionError: Could not connect to the endpoint URL: "https://lw-enram.s3.amazonaws.com/se/arl/2017/01/16/21/searl_vp_20170116214500.h5"