In [None]:
# https://sentinelhub-py.readthedocs.io/en/latest/configure.html#

%matplotlib inline
%reload_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import rasterio as rs
import seaborn as sns
import glob
import matplotlib.pyplot as plt
import os
from PIL import Image

from geopy.distance import VincentyDistance
from sentinelhub import WebFeatureService, BBox, CRS, DataSource, WmsRequest, WcsRequest, MimeType, get_area_info
from IPython.display import clear_output
from rasterio.plot import reshape_as_image

palette = sns.color_palette("coolwarm", 5)
instance_id = ''

In [None]:
def grade(row):
    if row['grade'] > 0:
        val = 1
    else:
        val = 0
    return val

df = pd.read_csv('../data/fallow_dataset.csv', dtype={'id':str})
print('gathered %s samples' % len(df))

df['label'] = df.apply(grade, axis=1)

df.head()

In [None]:
sns.countplot(x='grade', data=df, palette=palette)

In [None]:
sns.boxplot(x='grade', y='grade_area', data=df, palette=palette)

In [None]:
sns.countplot(x='label', data=df, palette=palette)

In [None]:
sns.boxplot(x='label', y='grade_area', data=df, palette=palette)

In [None]:
df = df[df['grade_area'] >= 0.15]
print('after filtering %s samples' % len(df))

In [None]:
 def plot_image(image, factor=1):
    fig = plt.subplots(nrows=1, ncols=1, figsize=(15, 7))

    if np.issubdtype(image.dtype, np.floating):
        plt.imshow(np.minimum(image * factor, 1))
    else:
        plt.imshow(image)

coords_visuals_wgs84 = [20.050616,51.439310,20.057914,51.443872]
bbox_visuals = BBox(bbox=coords_visuals_wgs84, crs=CRS.WGS84)

wms_visuals_request = WcsRequest(layer='FALLOWINITIAL',
                                bbox=bbox_visuals,
                                time='latest',
                                resx='10m', resy='10m',
                                maxcc=0.1,
                                image_format=MimeType.TIFF_d32f,
                                instance_id=instance_id) 

wms_visuals = wms_visuals_request.get_data()
plot_image(wms_visuals[-1][:, :, 3])

In [None]:
def update_progress(progress):
    bar_length = 20
    if isinstance(progress, int):
        progress = float(progress)
    if not isinstance(progress, float):
        progress = 0
    if progress < 0:
        progress = 0
    if progress >= 1:
        progress = 1
    
    block = int(round(bar_length * progress)) 
    clear_output(wait = True)
    text = "Progress: [{0}] {1:.1f}%".format("#" * block + "-" * (bar_length - block), progress * 100)
    print(text)    

In [None]:
acquisition_time = [('%s-06-01' % y, '%s-09-30' % y) for y in ['2015', '2016', '2017', '2018', '2019']]
maxcc = 0.05

directory_template = '../data/sentinel/%s/%s'

for i, (index, row) in enumerate(df.iterrows()):
    update_progress(i / len(df))

    data_folder = directory_template % (row['researcher'], row['id'])
    
    if not os.path.exists(data_folder):  
        coords_wgs84 = [row['min_lng'], row['min_lat'], row['max_lng'], row['max_lat']]
          
        for time in acquisition_time:    
            wms_fallow_request = WcsRequest(data_folder=data_folder,
                                            layer='FALLOWINITIAL',
                                            bbox=BBox(bbox=coords_wgs84, crs=CRS.WGS84),
                                            time=time,
                                            resx='10m', resy='10m',
                                            maxcc=maxcc,
                                            image_format=MimeType.TIFF_d32f,
                                            instance_id=instance_id)

            wms_fallow_request.get_data(save_data=True, redownload=False)
            
update_progress(1)

In [None]:
ids = []
researchers = []
areas = []
files = []
dates = []
grades = []
widths = []
heights = []
bands = []

tiff_glob = '../data/sentinel/%s/%s/*.tiff'

for i, (index, row) in enumerate(df.iterrows()):
    update_progress(i / len(df))
    data_folder_glob = tiff_glob % (row['researcher'], row['id'])

    for f in glob.glob(data_folder_glob):
        raster = rs.open(f).read()
        image = reshape_as_image(raster) 
        
        widths.append(image.shape[1])
        heights.append(image.shape[0])
        bands.append(image.shape[2])
        
        grades.append(row['grade'])
        ids.append(row['id'])
        areas.append(row['grade_area'])
        researchers.append(row['researcher'])
        files.append(os.path.basename(f))
        dates.append(f.split('_')[7])
 

training_data = pd.DataFrame({
    'id': ids,
    'researcher': researchers,
    'area': areas,
    'file': files,
    'date': dates,
    'grade': grades,
    'w': widths,
    'h': heights,
    'b': bands
})  
training_data['label'] = training_data.apply(grade, axis=1)
training_data.to_csv('../data/training.csv')

update_progress(1) 

In [None]:
sns.countplot(x='grade', data=training_data, palette=palette)

In [None]:
sns.countplot(x='label', data=training_data, palette=palette)