In [34]:
import glob
import ntpath
import os
import geopandas as gpd
import numpy as np
import rasterio
from rasterio.warp import reproject, Resampling
from shapely.geometry import Point
import pandas as pd
from pyproj import Transformer
from shapely.geometry import shape

In [28]:
!pip install sentinelsat

Collecting sentinelsat
  Using cached sentinelsat-1.2.1-py3-none-any.whl.metadata (10 kB)
Collecting html2text (from sentinelsat)
  Downloading html2text-2024.2.26.tar.gz (56 kB)
     ---------------------------------------- 0.0/56.5 kB ? eta -:--:--
     --------------------------- ---------- 41.0/56.5 kB 960.0 kB/s eta 0:00:01
     -------------------------------------- 56.5/56.5 kB 734.7 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting geojson>=2 (from sentinelsat)
  Downloading geojson-3.2.0-py3-none-any.whl.metadata (16 kB)
Collecting geomet (from sentinelsat)
  Using cached geomet-1.1.0-py3-none-any.whl.metadata (11 kB)
Using cached sentinelsat-1.2.1-py3-none-any.whl (48 kB)
Downloading geojson-3.2.0-py3-none-any.whl (15 kB)
Using cached geomet-1.1.0-py3-none-any.whl (31 kB)
Building wheels for collected packages: html2text
  Building wheel for html2text (setup.py): started
  Building wheel for html2

In [27]:
shapefile_path = "../data/land_cover/cop/CLC18_IE_wgs84/CLC18_IE_wgs84.shp"
ground_truth = "../data/land_cover/selected/area_reference.tiff"
geojson_path = "../config/crookstown.geojson"

In [32]:
from sentinelsat import read_geojson
def get_polygon(path = "config/crookstown.geojson"):
    geojson = read_geojson(path)
    polygon_jsons = geojson["features"]
    polygon_json = polygon_jsons[0]
    geometry_data = polygon_json["geometry"]
    print(f"get_polygon|geometry_data:{geometry_data}")
    polygon = shape(geometry_data)
    return polygon

In [36]:
polygon = get_polygon(geojson_path)
print(f"get_polygon_from_shapefile|polygon:{polygon}")

get_polygon|geometry_data:{"coordinates": [[[-8.754655, 51.785815], [-8.754655, 51.878858], [-9.028876, 51.878858], [-9.028876, 51.785815], [-8.754655, 51.785815]]], "type": "Polygon"}
get_polygon_from_shapefile|polygon:POLYGON ((-8.754655 51.785815, -8.754655 51.878858, -9.028876 51.878858, -9.028876 51.785815, -8.754655 51.785815))


In [4]:
gdf = gpd.read_file(shapefile_path)
gdf

Unnamed: 0,OBJECTID,ID,CODE_18,Class_Desc,Area_Ha,Shape_STAr,Shape_STLe,geometry
0,1,IE_1,111,Continuous urban fabric,3.593744e+01,3.595292e+05,3.399929e+03,"POLYGON ((-8.73762 51.74733, -8.73753 51.74732..."
1,2,IE_2,111,Continuous urban fabric,1.785159e+02,1.785928e+06,9.115961e+03,"POLYGON ((-8.46873 51.90207, -8.4685 51.90206,..."
2,3,IE_3,111,Continuous urban fabric,2.767229e+01,2.768421e+05,3.185437e+03,"POLYGON ((-7.84092 51.94764, -7.84102 51.94763..."
3,4,IE_4,111,Continuous urban fabric,4.389431e+01,4.391323e+05,3.740968e+03,"POLYGON ((-9.50588 52.0631, -9.50612 52.06227,..."
4,5,IE_5,111,Continuous urban fabric,5.020679e+01,5.022843e+05,3.335530e+03,"POLYGON ((-8.64723 52.14024, -8.64715 52.14023..."
...,...,...,...,...,...,...,...,...
18877,18878,IE_18878,522,Estuaries,5.021254e+01,5.023418e+05,5.987252e+03,"POLYGON ((-8.31413 55.09169, -8.31299 55.09124..."
18878,18879,IE_18879,522,Estuaries,1.747832e+02,1.748585e+06,1.643064e+04,"POLYGON ((-7.85675 55.15326, -7.85667 55.15326..."
18879,18880,IE_18880,522,Estuaries,8.743091e+01,8.746859e+05,9.344419e+03,"POLYGON ((-8.12393 55.15333, -8.1239 55.15333,..."
18880,18881,IE_18881,522,Estuaries,2.865799e+02,2.867034e+06,1.934943e+04,"POLYGON ((-7.32624 55.31113, -7.32619 55.31113..."


In [10]:
def get_data_frame(file_path, latlon_crs = 'epsg:4326'):
    print(f"get_data_frame|file_path : {file_path}")
    with rasterio.open(file_path) as f:
        zz = f.read(1)
        x = np.linspace(f.bounds.left, f.bounds.right, f.shape[1])
        y = np.linspace(f.bounds.bottom, f.bounds.top, f.shape[0])
        xx, yy = np.meshgrid(x, y)
        df = pd.DataFrame({
            'x': xx.flatten(),
            'y': yy.flatten(),
            'value': zz.flatten(),
        })
        transformer = Transformer.from_crs(f.crs, latlon_crs, always_xy=False)
        df['lat'], df['lon'] = transformer.transform(xx=df.x, yy=df.y)
        df.drop(columns=['x', 'y'], inplace=True)
        df = df[['lat', 'lon', 'value']]
        return df

In [11]:
df = get_data_frame(ground_truth)
df

get_data_frame|file_path : ../data/land_cover/selected/area_reference.tiff


Unnamed: 0,lat,lon,value
0,-8.989082,51.786323,0
1,-8.988947,51.786323,0
2,-8.988812,51.786323,0
3,-8.988677,51.786323,0
4,-8.988541,51.786323,0
...,...,...,...
961639,-8.783593,51.871628,0
961640,-8.783458,51.871628,0
961641,-8.783323,51.871628,0
961642,-8.783188,51.871628,0


In [12]:
gdf_points = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['lat'], df['lon']), crs="EPSG:4326")
gdf_points

Unnamed: 0,lat,lon,value,geometry
0,-8.989082,51.786323,0,POINT (-8.98908 51.78632)
1,-8.988947,51.786323,0,POINT (-8.98895 51.78632)
2,-8.988812,51.786323,0,POINT (-8.98881 51.78632)
3,-8.988677,51.786323,0,POINT (-8.98868 51.78632)
4,-8.988541,51.786323,0,POINT (-8.98854 51.78632)
...,...,...,...,...
961639,-8.783593,51.871628,0,POINT (-8.78359 51.87163)
961640,-8.783458,51.871628,0,POINT (-8.78346 51.87163)
961641,-8.783323,51.871628,0,POINT (-8.78332 51.87163)
961642,-8.783188,51.871628,0,POINT (-8.78319 51.87163)


In [13]:
joined_df = gpd.sjoin(gdf_points, gdf, how='left', predicate='within')
joined_df

Unnamed: 0,lat,lon,value,geometry,index_right,OBJECTID,ID,CODE_18,Class_Desc,Area_Ha,Shape_STAr,Shape_STLe
0,-8.989082,51.786323,0,POINT (-8.98908 51.78632),13204,13205,IE_13205,324,Transitional woodland-shrub,3.628898e+01,3.630462e+05,4.080039e+03
1,-8.988947,51.786323,0,POINT (-8.98895 51.78632),13204,13205,IE_13205,324,Transitional woodland-shrub,3.628898e+01,3.630462e+05,4.080039e+03
2,-8.988812,51.786323,0,POINT (-8.98881 51.78632),13204,13205,IE_13205,324,Transitional woodland-shrub,3.628898e+01,3.630462e+05,4.080039e+03
3,-8.988677,51.786323,0,POINT (-8.98868 51.78632),13204,13205,IE_13205,324,Transitional woodland-shrub,3.628898e+01,3.630462e+05,4.080039e+03
4,-8.988541,51.786323,0,POINT (-8.98854 51.78632),13204,13205,IE_13205,324,Transitional woodland-shrub,3.628898e+01,3.630462e+05,4.080039e+03
...,...,...,...,...,...,...,...,...,...,...,...,...
961639,-8.783593,51.871628,0,POINT (-8.78359 51.87163),4888,4889,IE_4889,231,Pastures,2.667865e+06,2.669015e+10,3.915652e+07
961640,-8.783458,51.871628,0,POINT (-8.78346 51.87163),4888,4889,IE_4889,231,Pastures,2.667865e+06,2.669015e+10,3.915652e+07
961641,-8.783323,51.871628,0,POINT (-8.78332 51.87163),4888,4889,IE_4889,231,Pastures,2.667865e+06,2.669015e+10,3.915652e+07
961642,-8.783188,51.871628,0,POINT (-8.78319 51.87163),4888,4889,IE_4889,231,Pastures,2.667865e+06,2.669015e+10,3.915652e+07


In [37]:
joined_df.describe()

Unnamed: 0,lat,lon,value,index_right,OBJECTID,Area_Ha,Shape_STAr,Shape_STLe
count,961644.0,961644.0,961644.0,961644.0,961644.0,961644.0,961644.0,961644.0
mean,-8.886067,51.828976,649.472129,5191.423976,5192.423976,2384297.0,23853250000.0,34995350.0
std,0.059515,0.024665,625.055807,1615.416861,1615.416861,822239.2,8225936000.0,12065810.0
min,-8.989082,51.786323,0.0,67.0,68.0,25.1913,252021.5,2029.055
25%,-8.937575,51.807582,0.0,4888.0,4889.0,2667865.0,26690150000.0,39156520.0
50%,-8.886067,51.828976,1101.0,4888.0,4889.0,2667865.0,26690150000.0,39156520.0
75%,-8.83456,51.85037,1225.0,4888.0,4889.0,2667865.0,26690150000.0,39156520.0
max,-8.783052,51.871628,6064.0,18274.0,18275.0,2667865.0,26690150000.0,39156520.0


In [38]:
joined_df.info

<bound method DataFrame.info of              lat        lon  value                   geometry  index_right  \
0      -8.989082  51.786323      0  POINT (-8.98908 51.78632)        13204   
1      -8.988947  51.786323      0  POINT (-8.98895 51.78632)        13204   
2      -8.988812  51.786323      0  POINT (-8.98881 51.78632)        13204   
3      -8.988677  51.786323      0  POINT (-8.98868 51.78632)        13204   
4      -8.988541  51.786323      0  POINT (-8.98854 51.78632)        13204   
...          ...        ...    ...                        ...          ...   
961639 -8.783593  51.871628      0  POINT (-8.78359 51.87163)         4888   
961640 -8.783458  51.871628      0  POINT (-8.78346 51.87163)         4888   
961641 -8.783323  51.871628      0  POINT (-8.78332 51.87163)         4888   
961642 -8.783188  51.871628      0  POINT (-8.78319 51.87163)         4888   
961643 -8.783052  51.871628      0  POINT (-8.78305 51.87163)         4888   

        OBJECTID        ID CODE