In [3]:
import os 
import glob

import numpy as np
import xarray as xr
import pandas as pd

from datetime import datetime, timedelta

In [4]:
path = '/uio/lagringshotell/geofag/projects/miphclac/hannasv/'
save_dir = '/uio/lagringshotell/geofag/projects/miphclac/hannasv/fractions_repo/'

years = np.arange(2004, 2019)
months = np.arange(1, 13)

In [6]:
def timestamp(filename):
    """
    Returns the numpy datetime 64 [ns] for the current date.
    This is a bit hardcoded at the moment ...
    """
    splits = filename.split('-')
    ts = splits[5]
    year = ts[:4]
    month = ts[4:6]
    day = ts[6:8]
    hr = ts[8:10]
    minuts = ts[10:12]
    sek = ts[12:14]
    # TODO make sure all filenames have seconds
    return np.datetime64( year+"-"+month+"-"+day+"T"+hr+":00:00"+".000000").astype(str)

In [7]:
def make_folder_str(month, year):
    """ Generates the folder search str
    month : int
    year : int
    
    Returns : str
        year_month
    """
    
    month = "%2.2d" % month
    return "{}_{}".format(year, month)

In [8]:
folder = make_folder_str(month, year)

In [9]:
def get_missing_vals(folder):
    """Returns missing timesteps in folder."""
    year, month = folder.split('_')
    year  = int(year)
    month = int(month)
    
    if month < 12:
        next_m = month + 1
    else:
        next_m = 1
        
    t = np.arange(datetime(year,month,1), datetime(year,next_m,1), timedelta(hours=1)).astype(str)
    folder = make_folder_str(month, year)
    files_in_folder = glob.glob(os.path.join(path, folder, '*grb'))
    times = [timestamp(fil) for fil in files_in_folder]
    a = times 
    b = t
    c = [x for x in a if x not in b]+[x for x in b if x not in a]
    return c

In [10]:
get_missing_vals(folder)

['2004-08-26T02:00:00.000000', '2004-08-26T03:00:00.000000']

In [11]:
files_in_folder = glob.glob(os.path.join(path, folder, '*grb'))
times = [timestamp(fil) for fil in files_in_folder]

In [12]:
def timestamp_to_file_search_str(timestamp):
    splits = [split.split('T') for split in timestamp.split(':')[0].split('-')]
    s = ''
    for a in np.concatenate(splits):
        s+=a
    return s

In [13]:
timestamp_to_file_search_str('2004-08-01T15')

'2004080115'

In [14]:
'2004-08-26T02:00:00.000000'

'2004-08-26T02:00:00.000000'

In [15]:
def removes_duplicates(year, month):
    folder = make_folder_str(month, year)
    files_in_folder = glob.glob(os.path.join(path, folder, '*grb'))

    if np.unique(times) != len(times):
        keeping = []
        missing = []
        for fil in files_in_folder:
            # if timestep is already there don't append
            search_for = timestamp_to_file_search_str(timestamp(fil))
            files =  glob.glob(os.path.join(path, folder, '*-{}*grb'.format(search_for)))  
            if len(files) > 0:
                keeping.append(files[0]) # only keep the first one for multiple files of the same data

In [16]:
def find_correct_fraction_file_and_add_regridded():
    """
    
    
    """
    pass

In [21]:
# len(get_missing_vals(make_folder_str(year=y, month=m)

In [23]:
files = ['2004_04.nc', '2004_06.nc', '2004_08.nc', '2004_10.nc', '2004_12.nc', '2005_06.nc', 
'2005_08.nc', '2005_10.nc', '2005_12.nc',  '2006_02.nc',
'2004_05.nc', '2004_07.nc', '2004_09.nc', '2004_11.nc', '2005_05.nc', '2005_07.nc', 
'2005_09.nc', '2005_11.nc', '2006_01.nc']


In [25]:
for fil in files:
    data = xr.open_dataset(os.path.join(save_dir, fil))
    print(data)
    print(' ')

<xarray.Dataset>
Dimensions:    (latitude: 81, longitude: 161, time: 690)
Coordinates:
  * time       (time) datetime64[ns] 2004-04-02 ... 2004-04-30T23:00:00
  * latitude   (latitude) float64 30.0 30.25 30.5 30.75 ... 49.5 49.75 50.0
  * longitude  (longitude) float64 -15.0 -14.75 -14.5 -14.25 ... 24.5 24.75 25.0
Data variables:
    tcc        (time, latitude, longitude) float64 ...
    nr_nans    (time, latitude, longitude) float64 ...
 
<xarray.Dataset>
Dimensions:    (latitude: 81, longitude: 161, time: 716)
Coordinates:
  * time       (time) datetime64[ns] 2004-06-01 ... 2004-06-30T23:00:00
  * latitude   (latitude) float64 30.0 30.25 30.5 30.75 ... 49.5 49.75 50.0
  * longitude  (longitude) float64 -15.0 -14.75 -14.5 -14.25 ... 24.5 24.75 25.0
Data variables:
    tcc        (time, latitude, longitude) float64 ...
    nr_nans    (time, latitude, longitude) float64 ...
 
<xarray.Dataset>
Dimensions:    (latitude: 81, longitude: 161, time: 741)
Coordinates:
  * time       (time) dat

# Testing : 

In [18]:
path = '/uio/lagringshotell/geofag/projects/miphclac/hannasv/'
save_dir = '/uio/lagringshotell/geofag/projects/miphclac/hannasv/fractions_repo/'

In [19]:
years = np.arange(2004, 2019)
months = np.arange(1, 13)

In [26]:
def add_file_to_existing(filename,
                         lat = np.arange(30.0, 50.25, 0.25),
                         lon = np.arange(-15.0, 25.25, 0.25)):
    
    # Find folder to add it to
    target_fil = get_fraction_file(filename)
    ds = xr.open_dataset(target_fil)

    """ grib_files : list of files. One month. """
    data_grid = get_dict_with_all_keys()

    counter = 0

    cloud_fraction, nans = compute(filename, lat, lon)
    new_ds = xr.Dataset({'tcc': (['latitude', 'longitude'],  cloud_fraction),
                         'nr_nans':(['latitude', 'longitude'], nans),
                        },
                          coords={'longitude': (['longitude'], lon),
                                  'latitude': (['latitude'], lat),
                                   })

    ts = timestamp(filename)
    new_ds['time'] = ts

    # Add time as a coordinate and dimension.
    new_ds = new_ds.assign_coords(time = new_ds.time)
    new_ds = new_ds.expand_dims(dim = 'time')

    try:
        ds = ds.merge(new_ds)
    except xr.MergeError:
        # Happens if MS1 and MS2 have taken a image at the same time
        print("Filename not included {}".format(filename))
    logging.debug('Finished added {} to {}.'.format())
        #print("completed {}/{} files".format(counter, len(grb_files)))
    return ds


In [27]:
test_fil = 'MSG4-SEVI-MSGCLMK-0100-0100-20190203040000.000000000Z-20190203041358-1375012.grb'

In [39]:
def get_fraction_file(filename):
    """ Raw filename.
    """
    a = filename.split('-')[5].split('.')[0][:6]
    target_fil = a[:4]+'_'+a[4:]+'.nc'
    path = '/uio/lagringshotell/geofag/projects/miphclac/hannasv/fractions_repo/'
    
    return os.path.join(path, target_fil)


In [38]:
get_fraction_file_based_on_location(test_fil)

'/uio/lagringshotell/geofag/projects/miphclac/hannasv/fractions_repo/2019_02.nc'