In [None]:
#THIS WILL TAKE A LONG TIME- BUT ONLY NEEDS TO BE RUN ONCE
#- You can run this in the command line should you wish

# path to store processed data_tiles; can be anywhere:
path = '/home/jovyan/gtc-exposure/cloud_free/tiles/'
# This downloads, lazy loads, splits the tiles, then removes the orignal tif \
# the results are saved in the directory specified above
!python3 cloud_free.py {path} 

In [None]:
import os
import pandas as pd
import rasterio
from rasterio.warp import transform_bounds
from shapely.geometry import Polygon, MultiPolygon, mapping

from cloud_free import retrieve_image, split_tiles, polygons, test_areas, informal_settlements

in_path ='/home/jovyan/gtc-exposure/cloud_free/tiles/'


###Generate training data ###
outpath_train = '/home/jovyan/gtc-exposure/cloud_free/train_images/'         
#extract from tiles the relevant area that corresponds to the polygon
retrieve_image(in_path, outpath_train, polygons())
#split images into training size 64*64
split_tiles(outpath_train, 64)
#create jpeg files and normalise images
!python3 pytorch_preprocess.py {outpath_train + 'tif/'}


#Generate test data
outpath_test = '/home/jovyan/gtc-exposure/cloud_free/test_images/'         
retrieve_image(in_path, outpath_test, test_areas())
split_tiles(outpath_train, 64)
label_informal_settlement(outpath_test)
!python3 pytorch_preprocess.py {outpath_test + 'tif/'}

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import gdal
import pyproj
import fiona
import rasterio
from rasterio.mask import mask
from rasterio.warp import transform_bounds
import matplotlib.pyplot as plt
from shapely.geometry import Polygon, MultiPolygon, mapping
from shapely.ops import transform
try:
    import rioxarray as rxr
except ModuleNotFoundError: 
    os.system("pip install rioxarray")
import rioxarray as rxr


#view images from tif files
def view_image(image_dir):
    columns = ['Informal', 'geometry']
    df = pd.DataFrame(columns = columns)

    for file in os.listdir(image_dir):
        if file.endswith('.tif'):
            
            #normalise images to plot images
            s2_cloud_free = rxr.open_rasterio(image_dir+file, masked=True).squeeze()
            red = s2_cloud_free[0]/s2_cloud_free.max()
            green = s2_cloud_free[1]/s2_cloud_free.max()
            blue = s2_cloud_free[2]/s2_cloud_free.max()
            s2_cloud_free_norm = np.dstack((red, green, blue))
            
            plt.figure(figsize=(20,10))
            plt.imshow(s2_cloud_free_norm)
            plt.show()
            
            #create dataframe storing tif information
            tif = rasterio.open(image_dir+file)
            if file.startswith('inf'):
                is_inf = 'inf'
            else:
                is_inf = 'not inf'
            coordinates = transform_bounds(tif.crs, 'EPSG:4326', tif.bounds.left,
                                                     tif.bounds.bottom,
                                                     tif.bounds.right, 
                                                             tif.bounds.top)
            left, bottom, right, top = coordinates
            Geometry = Polygon([[left, top], [right, top], [right ,bottom], [left, bottom], [left, top]])
            df.loc[len(df)]= [is_inf, Geometry]
    
    #convert dataframe to geopandas dataframe for ease of plotting
    df = gpd.GeoDataFrame(df, geometry='geometry', crs = 4326)
    return df

df = view_image('/home/jovyan/gtc-exposure/cloud_free/test_images/tif/')
df.plot()