### Check the occurences in each class

In [1]:
import os
import re
import sys
import fiona
import logging
import datetime
import numpy as np
import pandas as pd
import geopandas as gpd
import skimage
import skimage.draw
import pyproj
import rasterio
from rasterio.mask import mask
from rasterio.warp import calculate_default_transform, reproject, Resampling
from sklearn.inspection import permutation_importance

Can't load requested DLL: C:\Program Files\GDAL\gdalplugins\gdal_KEA.dll
127: The specified procedure could not be found.

Can't load requested DLL: C:\Program Files\GDAL\gdalplugins\gdal_KEA.dll
127: The specified procedure could not be found.

Can't load requested DLL: C:\Program Files\GDAL\gdalplugins\gdal_KEA.dll
127: The specified procedure could not be found.

Can't load requested DLL: C:\Program Files\GDAL\gdalplugins\gdal_KEA.dll
127: The specified procedure could not be found.



In [2]:
def load_target_shp(path, transform=None, proj_out=None):
    """ Load the shapefile as a list of numpy array of coordinates
        INPUT : path (str) -> the path to the shapefile
                transform (rasterio.Affine) -> the affine transformation to get the polygon in row;col format from UTM.
        OUTPUT : poly (list of np.array) -> list of polygons (as numpy.array of coordinates)
                 poly_rc (list of np.array) -> list of polygon in row-col format if a transform is given
    """
    print("Loading target shapefile...")
    with fiona.open(path) as shapefile:
        proj_in = pyproj.Proj(shapefile.crs)
        class_type = [feature['properties']['id'] for feature in shapefile]
        features = [feature["geometry"] for feature in shapefile]
    # re-project polygons if necessary
    if proj_out is None or proj_in == proj_out:
        poly = [np.array([(coord[0], coord[1]) for coord in features[i]['coordinates'][0]]) for i in
                range(len(features))]
        print('No re-projection!')
    else:
        poly = [np.array(
            [pyproj.transform(proj_in, proj_out, coord[0], coord[1]) for coord in features[i]['coordinates'][0]]) for i
                in range(len(features))]
        print(f'Re-project from {proj_in} to {proj_out}')

    poly_rc = None
    # transform in row-col if a transform is given
    if transform is not None:
        poly_rc = [np.array([rasterio.transform.rowcol(transform, coord[0], coord[1])[::-1] for coord in p]) for p in
                   poly]
    print('Loaded target shape files.')

    return poly, poly_rc, class_type


def compute_mask(polygon_list, meta, val_list):
    """ Get mask of class of a polygon list
        INPUT : polygon_list (list od polygon in coordinates (x, y)) -> the polygons in row;col format
                meta -> the image width and height
                val_list(list of int) -> the class associated with each polygon
        OUTPUT : img (np.array 2D) -> the mask in which the pixel value reflect it's class (zero being the absence of class)
    """
    img = np.zeros((meta['height'], meta['width']), dtype=np.int8)  # skimage : row,col --> h,w
    i = 0
    for polygon, val in zip(polygon_list, val_list):
        rr, cc = skimage.draw.polygon(polygon[:, 1], polygon[:, 0], img.shape)
        img[rr, cc] = val
        i += 1
    print("Added targets' mask.")
    return img

In [3]:
def merge_shapefiles(to_label_path='../data/all-labels/all-labels.shp'):
    # read all the shape files
    old_apples_shp = gpd.read_file('../data/apples/survey20210716_polygons20210819_corrected20210831.shp')
    new_apples_shp = gpd.read_file('../data/apples/survey20210825_polygons20210901_revised20210929.shp')
    non_crops_shp = gpd.read_file('../data/non-crops/non-crop.shp')
    other_crops_shp = gpd.read_file('../data/other-crops/other-crops.shp')
    # put all shape files into one geo dataframe
    all_labels_shp = gpd.GeoDataFrame(
        pd.concat([old_apples_shp, new_apples_shp, other_crops_shp, non_crops_shp], axis=0))
    all_labels_shp = all_labels_shp.dropna().reset_index(drop=True)  # delete empty polygons
    all_labels_shp = multipolygons_to_polygons(all_labels_shp)
    # mask for the study area
    study_area_shp = gpd.read_file('../data/study-area/study_area.shp')
    labels_in_study = gpd.overlay(all_labels_shp, study_area_shp, how='intersection')
    cols2drop = [col for col in ['id', 'id_2'] if col in labels_in_study.columns]
    labels_in_study = labels_in_study.drop(cols2drop, axis=1).rename(columns={'id_1': 'id'})
    labels_in_study.to_file(to_label_path)  # save to folder
    

def multipolygons_to_polygons(shp_file):
    # check the number of multipolygons
    multi_polygons_df = shp_file[shp_file['geometry'].type == 'MultiPolygon']
    polygons_df = shp_file[shp_file['geometry'].type == 'Polygon']
    if multi_polygons_df.shape[0] == 0:
        print('No multi-polygons!')
    else:
        new_polygons = []
        num_multi_polygons = multi_polygons_df.shape[0]
        print(f'Converting {num_multi_polygons} multi-polygons to polygons...')
        for i in range(num_multi_polygons):
            multi_polygon_ = multi_polygons_df.iloc[i]
            label, district, multi_polygon = multi_polygon_.id, multi_polygon_.district, multi_polygon_.geometry
            for polygon in list(multi_polygon):
                new_polygons.append([label, district, polygon])
        new_polygons_df = pd.DataFrame(new_polygons, columns=['id', 'district', 'geometry'])
        polygons_df = pd.concat([polygons_df, new_polygons_df], axis=0)
    return polygons_df

In [4]:
def load_geotiff(path, window=None):
    """ Load the geotiff as a list of numpy array.
        INPUT : path (str) -> the path to the geotiff
                window (rasterio.windows.Window) -> the window to use when loading the image
        OUTPUT : band (list of numpy array) -> the different bands as float scaled to 0:1
                 meta (dictionary) -> the metadata associated with the geotiff
    """
    with rasterio.open(path) as f:
        band = [skimage.img_as_float(f.read(i+1, window=window)) for i in range(f.count)]
        meta = f.meta
        if window is not None:
            meta['height'] = window.height
            meta['width'] = window.width
            meta['transform'] = f.window_transform(window)
    return band, meta

In [12]:
merge_shapefiles()

Converting 1 multi-polygons to polygons...


In [6]:
band, meta = load_geotiff('N:/dataorg-datasets/MLsatellite/sentinel2_images/images_danya/clip/L2A_T43SFR_A026271_20200703T053446.tiff')

In [13]:
train_polygons, train_rc_polygons, train_class_list = \
    load_target_shp('../data/all-labels/all-labels.shp',
                    transform=meta['transform'],
                    proj_out=pyproj.Proj(meta['crs']))
train_mask = compute_mask(train_rc_polygons, meta, train_class_list)

Loading target shapefile...
No re-projection!
Loaded target shape files.
Added targets' mask.


  in_crs_string = _prepare_from_proj_string(in_crs_string)


In [8]:
def count_classes(y):
    tot_num = len(y)
    for i in np.unique(y):
        y_i = y[y == i]
        print(f'  label = {i}, pixel number = {y_i.shape[0]}, percentage = {round(len(y_i)/tot_num*100, 2)}%')


In [9]:
train_mask.shape

(2357, 1892)

In [14]:
count_classes(train_mask.reshape(-1))

  label = -1, pixel number = 50746, percentage = 1.14%
  label = 0, pixel number = 4350799, percentage = 97.56%
  label = 1, pixel number = 6021, percentage = 0.14%
  label = 2, pixel number = 51878, percentage = 1.16%


In [15]:
y = train_mask.reshape(-1).copy()
y = y[y!=0]
y[y==2] = 1
count_classes(y)

  label = -1, pixel number = 50746, percentage = 46.71%
  label = 1, pixel number = 57899, percentage = 53.29%


### Cloud masking

In [31]:
path = images_dir + '/safe/S2B_MSIL1C_20201228T054239_N0209_R005_T43SFR_20201228T063652.SAFE/GRANULE/L1C_T43SFR_A019908_20201228T054233/QI_DATA/'

# Validate

- with Gfsat
- with Copernicus (same region)
- with Copernicus (more cropland region)
- with Europe data

In [126]:
def clip_single_raster(shape_crs, shapes, geotiff_filepath, clip_filepath):
    # get the coordinate system of raster
    raster = rasterio.open(geotiff_filepath)

    # check if two coordinate systems are the same
    if shape_crs != raster.crs:
        reproject_single_raster(shape_crs, geotiff_filepath, clip_filepath)
        # read imagery file
        with rasterio.open(clip_filepath) as src:
            out_image, out_transform = mask(src, shapes, crop=True)
            out_meta = src.meta
    else:
        # read imagery file
        with rasterio.open(geotiff_filepath) as src:
            out_image, out_transform = mask(src, shapes, crop=True)
            out_meta = src.meta

    # Save clipped imagery
    out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})

    with rasterio.open(clip_filepath, "w", **out_meta) as dst:
        # out_image.shape (band, height, width)
        dst.write(out_image)


def reproject_single_raster(dst_crs, input_file, transformed_file):
    """
    :param dst_crs: output projection system
    :param input_file
    :param transformed_file
    :return:
    """
    with rasterio.open(input_file) as imagery:
        print(f"Reprojecting from {imagery.crs} to {dst_crs}.")
        transform, width, height = calculate_default_transform(imagery.crs, dst_crs, imagery.width, imagery.height,
                                                               *imagery.bounds)
        kwargs = imagery.meta.copy()
        kwargs.update({'crs': dst_crs, 'transform': transform, 'width': width, 'height': height})
        with rasterio.open(transformed_file, 'w', **kwargs) as dst:
            for i in range(1, imagery.count + 1):
                reproject(
                    source=rasterio.band(imagery, i),
                    destination=rasterio.band(dst, i),
                    src_transform=imagery.transform,
                    src_crs=imagery.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)

### load predictions

In [110]:
pred_path = '../preds/1008-183014_rfc.tif'
band_pred, meta_pred = load_geotiff(pred_path)

In [45]:
band_pred

[array([[1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]])]

In [46]:
band_pred[0].shape

(2357, 1892)

In [50]:
np.unique(band_pred)*255

array([  1., 255.])

In [48]:
meta_pred

{'driver': 'GTiff',
 'dtype': 'uint8',
 'nodata': None,
 'width': 1892,
 'height': 2357,
 'count': 1,
 'crs': CRS.from_epsg(4326),
 'transform': Affine(9.810891648968448e-05, 0.0, 77.03211244126558,
        0.0, -9.810891648968448e-05, 32.25451163012633)}

In [51]:
band_pred = band_pred*255

### with GFsat

In [19]:
gfsat_path = 'K:/2021-data-org/4. RESEARCH_n/ML/MLsatellite/Data/layers_india/ancilliary_data/cropland/GFSAD30/GFSAD30SAAFGIRCE_2015_N30E70_001_2017286103800.tif'

In [20]:
band_gfsat, meta_gfsat = load_geotiff(gfsat_path)

In [29]:
band_gfsat

[array([[0.00392157, 0.00392157, 0.00392157, ..., 0.00392157, 0.00392157,
         0.00392157],
        [0.00392157, 0.00392157, 0.00392157, ..., 0.00392157, 0.00392157,
         0.00392157],
        [0.00392157, 0.00392157, 0.00392157, ..., 0.00392157, 0.00392157,
         0.00392157],
        ...,
        [0.00392157, 0.00392157, 0.00392157, ..., 0.00392157, 0.00392157,
         0.00392157],
        [0.00392157, 0.00392157, 0.00392157, ..., 0.00392157, 0.00392157,
         0.00392157],
        [0.00392157, 0.00392157, 0.00392157, ..., 0.00392157, 0.00392157,
         0.00392157]])]

In [22]:
len(band_gfsat), band_gfsat[0].shape

(1, (37513, 37116))

In [25]:
meta, meta_gfsat, meta_copernicus

({'driver': 'GTiff',
  'dtype': 'uint16',
  'nodata': None,
  'width': 1892,
  'height': 2357,
  'count': 4,
  'crs': CRS.from_epsg(4326),
  'transform': Affine(9.810891648968448e-05, 0.0, 77.03211244126558,
         0.0, -9.810891648968448e-05, 32.25451163012633)},
 {'driver': 'GTiff',
  'dtype': 'uint8',
  'nodata': None,
  'width': 37116,
  'height': 37513,
  'count': 1,
  'crs': CRS.from_epsg(4326),
  'transform': Affine(0.0002694945852358563, 0.0, 69.99879306374659,
         0.0, -0.0002694945852358565, 40.108609626067285)},
 {'driver': 'GTiff',
  'dtype': 'uint8',
  'nodata': 255.0,
  'width': 20160,
  'height': 20160,
  'count': 1,
  'crs': CRS.from_epsg(4326),
  'transform': Affine(0.0009920634920634888, 0.0, 60.0,
         0.0, -0.0009920634920634888, 40.0)})

In [30]:
np.unique(band_gfsat[0])

array([0.        , 0.00392157, 0.00784314])

In [83]:
# shape file information
study_area_shp ='../data/study-area/study_area.shp'
with fiona.open(study_area_shp, "r") as shapefile:
    study_area_shapes = [feature["geometry"] for feature in shapefile if feature["geometry"] is not None]
study_area_crs = gpd.read_file(study_area_shp).crs
gfsat_clip_path = '../data/gfsat_clipped.tiff'
clip_single_raster(study_area_crs, study_area_shapes, gfsat_path, gfsat_clip_path)

In [131]:
gfsat_clip_path = '../data/gfsat_clipped.tiff'

In [84]:
band_gfsat, meta_gfsat = load_geotiff(gfsat_clip_path)

In [85]:
band_gfsat[0].shape  # as rsolution is 30 meters 

(858, 689)

In [121]:
band_gfsat = band_gfsat/np.unique(band_gfsat).max()

In [70]:
np.unique(band_gfsat[0])

array([0. , 0.5, 1. ])

In [54]:
((band_pred==1.0) == (band_gfsat==1.0)).sum()

586311

In [56]:
band_pred[0].reshape(-1).shape[0]

4459444

In [59]:
print(f'Number of 1 in gfsat: {(band_gfsat==1.0).sum()}')
print(f'Number of equal: {((band_pred==1.0) == (band_gfsat==1.0)).sum()}')
print(f'Percentage: {((band_pred==1.0) == (band_gfsat==1.0)).sum()/(band_gfsat==1.0).sum():.4f}')


Number of 1 in gfsat: 4851
Number of equal: 586311
Percentage: 120.8639


In [69]:
band_gfsat

array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]])

In [78]:
band_pred[0].shape, (band_gfsat[0] == 1.0).shape

((2357, 1892), (858, 689))

In [87]:
2357/858, 1892/689

(2.747086247086247, 2.7460087082728593)

In [97]:
meta_gfsat

{'driver': 'GTiff',
 'dtype': 'uint8',
 'nodata': None,
 'width': 689,
 'height': 858,
 'count': 1,
 'crs': CRS.from_epsg(4326),
 'transform': Affine(0.0002694945852358563, 0.0, 77.03206274923197,
        0.0, -0.0002694945852358565, 32.254459433953485)}

In [101]:
meta_pred

{'driver': 'GTiff',
 'dtype': 'uint8',
 'nodata': None,
 'width': 1892,
 'height': 2357,
 'count': 1,
 'crs': CRS.from_epsg(4326),
 'transform': Affine(9.810891648968448e-05, 0.0, 77.03211244126558,
        0.0, -9.810891648968448e-05, 32.25451163012633)}

In [104]:
study_area_crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [122]:
np.unique(band_gfsat[0])

array([0. , 0.5, 1. ])

In [132]:
reproject_single_raster(
    meta_pred['crs'], gfsat_clip_path, "./gfsat_clipped.tiff")

Reprojecting from EPSG:4326 to EPSG:4326.


In [123]:
out_image = np.expand_dims(band_gfsat[0].repeat(3, axis=0).repeat(3, axis=1), axis=0)
print(band_gfsat[0].shape, out_image.shape)
out_meta = meta_pred.copy()
out_meta.update({
     "height": out_image.shape[1],
     "width": out_image.shape[2]})
with rasterio.open('../data/gfsat_clip_upsample.tiff', "w", **out_meta) as dst:
    # out_image.shape (band, height, width)
    dst.write(out_image)

(858, 689) (1, 2574, 2067)


In [133]:
import os

In [210]:
x = os.system(f"gdalwarp -overwrite -r average -t_srs {meta_pred['crs']} -ts {meta_pred['width']} {meta_pred['height']} -te {dataset.bounds.left} {dataset.bounds.bottom} {dataset.bounds.right} {dataset.bounds.top} {'../data/gfsad_clipped.tiff'} {'../data/gfsad_resample.tiff'}")
x

0

In [143]:
meta_pred

{'driver': 'GTiff',
 'dtype': 'uint8',
 'nodata': None,
 'width': 1892,
 'height': 2357,
 'count': 1,
 'crs': CRS.from_epsg(4326),
 'transform': Affine(9.810891648968448e-05, 0.0, 77.03211244126558,
        0.0, -9.810891648968448e-05, 32.25451163012633)}

In [150]:
dataset = rasterio.open(pred_path)
dataset.bounds

BoundingBox(left=77.03211244126558, bottom=32.023268913960145, right=77.21773451126407, top=32.25451163012633)

Resampled

In [153]:
gfsat_resample_path = '../data/gfsat_resample.tiff'
band_gfsat_resample, meta_gfsat_resample = load_geotiff(gfsat_resample_path)
band_gfsat_resample[0].shape

(2357, 1892)

In [159]:
band_gfsat_resample = band_gfsat_resample/band_gfsat_resample[0].max()
np.unique(band_gfsat_resample)

array([0. , 0.5, 1. ])

In [165]:
band_pred[0].shape, band_gfsat_resample[0].shape

((2357, 1892), (2357, 1892))

In [173]:
np.unique(band_pred)

array([  1., 255.])

In [172]:
band_pred = band_pred/band_pred[0].min()

In [174]:
tot_1_in_gfsat = (band_gfsat_resample[0]==1.0).sum()
num_1_in_preds = (band_pred[0][band_gfsat_resample[0]==1.0]==1.0).sum()
print(f'Number of 1 in gfsat: {tot_1_in_gfsat}')
print(f'Number of equal: {num_1_in_preds}')
print(f'Percentage: {num_1_in_preds/tot_1_in_gfsat:.4f}')

Number of 1 in gfsat: 36583
Number of equal: 25202
Percentage: 0.6889


In [224]:
def clip_open_datasets_based_on_study_area(input_path, output_path):
    study_area_shp ='../data/study-area/study_area.shp'
    with fiona.open(study_area_shp, "r") as shapefile:
        study_area_shapes = [feature["geometry"] for feature in shapefile if feature["geometry"] is not None]
    study_area_crs = gpd.read_file(study_area_shp).crs
    clip_single_raster(study_area_crs, study_area_shapes, input_path, output_path)
    
    
def align_raster(pred_path, input_path, output_path):
    """
    Align according to prediction file (with boundary and resolution adjustment).
    
    """
    # prepare source info
    bounds = rasterio.open(pred_path).bounds
    _, meta_tar = load_geotiff(pred_path)
    
    # command
    cmd = f"gdalwarp -overwrite -r average -t_srs {meta_tar['crs']} -ts {meta_tar['width']} {meta_tar['height']} " +\
            f"-te {bounds.left} {bounds.bottom} {bounds.right} {bounds.top} {input_path} {output_path}"
    returned_val = os.system(cmd)
    if returned_val == 0:
        print('Successfully align raster!')
    else:
        print('Alignment failed!')
        

def compare_predictions_with_gfsad(pred_path, dataset_path):
    # load data
    band_pred, meta_pred = load_geotiff(pred_path)   
    band_dataset, meta_dataset = load_geotiff(dataset_path)   
    band_pred = band_pred[0]
    band_dataset = band_dataset[0]
    
    # rescale to make target value taking 1
    band_pred = band_pred/band_pred.min()
    band_dataset = band_dataset/band_dataset.max()
    
    # calculate
    num_in_dataset = (band_dataset==1.0).sum()
    num_in_pred = (band_pred[band_dataset==1.0]==1.0).sum()
    print(f'Cropland pixel number in GFASD: {num_in_dataset}')
    print(f'Cropland pixel number in prediction: {num_in_pred}')
    print(f'Percentage: {num_in_pred/num_in_dataset*100:.2f}%')
                   
                     
def compare_predictions_with_copernicus(pred_path, dataset_path):
    # load data
    band_pred, meta_pred = load_geotiff(pred_path)   
    band_dataset, meta_dataset = load_geotiff(dataset_path)   
    band_pred = band_pred[0]
    band_dataset = band_dataset[0]
    
    # rescale to make target value taking 1
    band_pred = band_pred/band_pred.max() 
    band_dataset = band_dataset*255
    band_dataset[(band_dataset==50)|(band_dataset==111)] = 1
    
    # calculate
    num_in_dataset = (band_dataset==1.0).sum()
    num_in_pred = (band_pred[band_dataset==1.0]==1.0).sum()
    print(f'Non-cropland pixel number in Copernicus: {num_in_dataset}')
    print(f'Non-cropland pixel number in prediction: {num_in_pred}')
    print(f'Percentage: {num_in_pred/num_in_dataset*100:.2f}%')

In [217]:
gfsad_path = 'K:/2021-data-org/4. RESEARCH_n/ML/MLsatellite/Data/layers_india/ancilliary_data/cropland/GFSAD30/GFSAD30SAAFGIRCE_2015_N30E70_001_2017286103800.tif'
gfsad_clip_path = '../data/gfsad_clipped.tiff'
gfsad_align_path = '../data/gfsad_aligned.tiff'
pred_path = '../preds/1008-183014_rfc.tif'

clip_open_datasets_based_on_study_area(gfsad_path, gfsad_clip_path)
align_raster(pred_path, gfsad_clip_path, gfsad_align_path)

compare_predictions_with_gfsad(pred_path, gfsad_align_path)

Successfully align raster!
Cropland pixel number in GFSAD: 36583
Cropland pixel number in prediction: 25202
Percentage: 68.89%


### with Copernicus (same region)

In [23]:
copernicus_path = 'K:/2021-data-org/4. RESEARCH_n/ML/MLsatellite/Data/layers_india/ancilliary_data/landcover/Copernicus_LC100m/INDIA_2019/E060N40_PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif'

In [24]:
band_copernicus, meta_copernicus = load_geotiff(copernicus_path)

In [26]:
band_copernicus

[array([[0.07843137, 0.07843137, 0.11764706, ..., 0.23529412, 0.23529412,
         0.23529412],
        [0.23529412, 0.07843137, 0.11764706, ..., 0.23529412, 0.23529412,
         0.23529412],
        [0.23529412, 0.11764706, 0.11764706, ..., 0.23529412, 0.23529412,
         0.23529412],
        ...,
        [0.78431373, 0.78431373, 0.78431373, ..., 0.15686275, 0.15686275,
         0.15686275],
        [0.78431373, 0.78431373, 0.78431373, ..., 0.15686275, 0.15686275,
         0.15686275],
        [0.78431373, 0.78431373, 0.78431373, ..., 0.15686275, 0.15686275,
         0.15686275]])]

In [27]:
len(band_copernicus), band_copernicus[0].shape

(1, (20160, 20160))

In [218]:
unique_ids = np.unique(band_copernicus[0])

In [220]:
unique_ids

array([0.        , 0.07843137, 0.11764706, 0.15686275, 0.19607843,
       0.23529412, 0.2745098 , 0.31372549, 0.35294118, 0.39215686,
       0.43529412, 0.43921569, 0.44313725, 0.44705882, 0.45098039,
       0.45490196, 0.4745098 , 0.47843137, 0.48235294, 0.48627451,
       0.49019608, 0.49411765, 0.78431373])

In [221]:
unique_ids*255

array([  0.,  20.,  30.,  40.,  50.,  60.,  70.,  80.,  90., 100., 111.,
       112., 113., 114., 115., 116., 121., 122., 123., 124., 125., 126.,
       200.])

In [226]:
copernicus_path = 'K:/2021-data-org/4. RESEARCH_n/ML/MLsatellite/Data/layers_india/ancilliary_data/landcover/Copernicus_LC100m/INDIA_2019/E060N40_PROBAV_LC100_global_v3.0.1_2019-nrt_Discrete-Classification-map_EPSG-4326.tif'
copernicus_clip_path = '../data/copernicus_clipped.tiff'
copernicus_align_path = '../data/copernicus_aligned.tiff'
pred_path = '../preds/1008-183014_rfc.tif'

clip_open_datasets_based_on_study_area(copernicus_path, copernicus_clip_path)
align_raster(pred_path, copernicus_clip_path, copernicus_align_path)

compare_predictions_with_copernicus(pred_path, copernicus_align_path)

Successfully align raster!
Non-cropland pixel number in Copernicus: 665909
Non-cropland pixel number in prediction: 633714
Percentage: 95.17%
