## GDM PROJECT 2 

BY: ABDULMALIK AJIBADE

In [1]:
import pandas as pd 
import numpy as np 
import geopandas as gpd
import matplotlib.pyplot as plt
import sqlalchemy

## Project Tasks

### 1. Creation of the first SHP (instruments):

- Open the 4 files with pandas.
- Merge it in a new DataFrame.
- Make a GeoDataFrame with all the instruments (convert geometry from EPSG:4978 to EPSG:4326).
- Calculate the site_id and the instrument_id (add new columns).
- Remove useless columns.
- Save the data!

In [2]:
inst_1 = pd.read_fwf('ITRF2020_DORIS_cart.txt')
inst_2 = pd.read_fwf('ITRF2020_GNSS_cart.txt')
inst_3 = pd.read_fwf('ITRF2020_SLR_cart.txt')
inst_4 = pd.read_fwf('ITRF2020_VLBI_cart.txt')

In [3]:
instrument_data = pd.concat([inst_1, inst_2, inst_3, inst_4])

In [4]:
instrument_data.head()

Unnamed: 0,id,name,type,code,x,y,z,dx,dy,dz
0,10002S018,Grasse (OCA),DORIS,GR3B,4581680.0,556166.4818,4389372.0,0.002,0.0025,0.002
1,10002S019,Grasse (OCA),DORIS,GR4B,4581681.0,556166.9141,4389371.0,0.0019,0.0024,0.0017
2,10003S001,Toulouse,DORIS,TLSA,4628047.0,119670.6873,4372788.0,0.0054,0.0062,0.0051
3,10003S003,Toulouse,DORIS,TLHA,4628693.0,119985.077,4372105.0,0.0034,0.0042,0.0032
4,10003S005,Toulouse,DORIS,TLSB,4628694.0,119985.0787,4372105.0,0.0026,0.0039,0.0025


In [5]:
instrument_gdf = gpd.GeoDataFrame(gpd.GeoDataFrame(instrument_data, 
                                                   geometry=gpd.points_from_xy(instrument_data['x'], 
                                                                               instrument_data['y'], 
                                                                               instrument_data['z']), 
                                                                              crs="EPSG:4978" )

)


instrument_gdf = instrument_gdf.to_crs(epsg=4326)


In [6]:
instrument_gdf.head()

Unnamed: 0,id,name,type,code,x,y,z,dx,dy,dz,geometry
0,10002S018,Grasse (OCA),DORIS,GR3B,4581680.0,556166.4818,4389372.0,0.002,0.0025,0.002,POINT Z (6.92123 43.75483 1323.70087)
1,10002S019,Grasse (OCA),DORIS,GR4B,4581681.0,556166.9141,4389371.0,0.0019,0.0024,0.0017,POINT Z (6.92123 43.75483 1323.8158)
2,10003S001,Toulouse,DORIS,TLSA,4628047.0,119670.6873,4372788.0,0.0054,0.0062,0.0051,POINT Z (1.48121 43.55814 207.69101)
3,10003S003,Toulouse,DORIS,TLHA,4628693.0,119985.077,4372105.0,0.0034,0.0042,0.0032,POINT Z (1.48489 43.54962 210.79597)
4,10003S005,Toulouse,DORIS,TLSB,4628694.0,119985.0787,4372105.0,0.0026,0.0039,0.0025,POINT Z (1.48489 43.54962 211.08413)


In [7]:
instrument_gdf['site_id'] = instrument_gdf['id'].str[:5]
instrument_gdf['instrument_id'] = instrument_gdf['id'].str[5:]

In [8]:
instrument_gdf = instrument_gdf.drop(columns=['dx', 'dy', 'dz'])

In [9]:
instrument_gdf.to_file("instruments.gpkg", layer='layer_name', driver="GPKG")

In [10]:
instrument_gdf.head()

Unnamed: 0,id,name,type,code,x,y,z,geometry,site_id,instrument_id
0,10002S018,Grasse (OCA),DORIS,GR3B,4581680.0,556166.4818,4389372.0,POINT Z (6.92123 43.75483 1323.70087),10002,S018
1,10002S019,Grasse (OCA),DORIS,GR4B,4581681.0,556166.9141,4389371.0,POINT Z (6.92123 43.75483 1323.8158),10002,S019
2,10003S001,Toulouse,DORIS,TLSA,4628047.0,119670.6873,4372788.0,POINT Z (1.48121 43.55814 207.69101),10003,S001
3,10003S003,Toulouse,DORIS,TLHA,4628693.0,119985.077,4372105.0,POINT Z (1.48489 43.54962 210.79597),10003,S003
4,10003S005,Toulouse,DORIS,TLSB,4628694.0,119985.0787,4372105.0,POINT Z (1.48489 43.54962 211.08413),10003,S005


### 2. Creation of the second SHP (sites):

- Keep only the instruments that belongs to a site (look at the five first numbers of the DOMES (id) number) which hosts at least 3 instruments from 2 different measurement techniques (GNSS, DORIS, SLR or VLBI).
- Make a spatial groupby (dissolve) two join all the points from a same site.
- Calculate a polygon from the list of points (you will need the shapely Polygon function and the shapely convex_hull property).
- Save the data!

In [11]:


site_stats = (
    instrument_gdf.groupby('site_id')
    .agg(instrument_count=('instrument_id', 'size'),  # Count of instruments
         unique_types=('type', 'nunique'))           # Count of unique measurement techniques
    .reset_index()
)


eligible_sites = site_stats[
    (site_stats['instrument_count'] >= 3) &
    (site_stats['unique_types'] >= 2)
]


filtered_sites = instrument_gdf[instrument_gdf['site_id'].isin(eligible_sites['site_id'])]



In [12]:
site_counts = instrument_gdf['site_id'].value_counts()


instrument_gdf_filtered = instrument_gdf[instrument_gdf['site_id'].\
                                         isin(site_counts[site_counts > 2].index)]

In [13]:
site_counts = instrument_gdf['site_id'].value_counts()


instrument_gdf_filtered = instrument_gdf[instrument_gdf['site_id'].\
                                         isin(site_counts[site_counts > 2].index)]

In [23]:
instruments_dissolved = instrument_gdf_filtered.dissolve(by='site_id')

In [24]:
instruments_dissolved.head()

Unnamed: 0_level_0,geometry,id,name,type,code,x,y,z,instrument_id
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10002,"MULTIPOINT Z (6.92058 43.75474 1319.30335, 6.9...",10002S018,Grasse (OCA),DORIS,GR3B,4581680.0,556166.5,4389372.0,S018
10003,"MULTIPOINT Z (1.48076 43.56077 207.09635, 1.48...",10003S001,Toulouse,DORIS,TLSA,4628047.0,119670.7,4372788.0,S001
10004,"MULTIPOINT Z (-4.50383 48.40787 104.42082, -4....",10004M004,Brest,GNSS,BRST,4231162.0,-332746.5,4745131.0,M004
10077,"MULTIPOINT Z (8.76246 41.92747 98.24128, 8.762...",10077S002,Ajaccio,DORIS,AJAB,4696990.0,723981.2,4239679.0,S002
10202,"MULTIPOINT Z (-21.99518 64.15098 95.75501, -21...",10202S001,Reykjavik,DORIS,REYA,2585528.0,-1044368.0,5717159.0,S001


In [25]:
instruments_dissolved['Polygon'] = instruments_dissolved['geometry'].convex_hull

In [26]:
instruments_dissolved.head()

Unnamed: 0_level_0,geometry,id,name,type,code,x,y,z,instrument_id,Polygon
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
10002,"MULTIPOINT Z (6.92058 43.75474 1319.30335, 6.9...",10002S018,Grasse (OCA),DORIS,GR3B,4581680.0,556166.5,4389372.0,S018,"POLYGON Z ((6.92077 43.75449 1319.85842, 6.920..."
10003,"MULTIPOINT Z (1.48076 43.56077 207.09635, 1.48...",10003S001,Toulouse,DORIS,TLSA,4628047.0,119670.7,4372788.0,S001,"POLYGON Z ((1.48489 43.54962 210.79597, 1.4812..."
10004,"MULTIPOINT Z (-4.50383 48.40787 104.42082, -4....",10004M004,Brest,GNSS,BRST,4231162.0,-332746.5,4745131.0,M004,"POLYGON Z ((-4.49659 48.3805 65.82489, -4.5038..."
10077,"MULTIPOINT Z (8.76246 41.92747 98.24128, 8.762...",10077S002,Ajaccio,DORIS,AJAB,4696990.0,723981.2,4239679.0,S002,"POLYGON Z ((8.7627 41.92739 96.80211, 8.76246 ..."
10202,"MULTIPOINT Z (-21.99518 64.15098 95.75501, -21...",10202S001,Reykjavik,DORIS,REYA,2585528.0,-1044368.0,5717159.0,S001,"POLYGON Z ((-21.95549 64.13879 93.04831, -21.9..."


In [27]:
instruments_dissolved = instruments_dissolved.drop(columns='geometry')

In [28]:
instruments_dissolved.to_file("instruments_polygon.gpkg", layer='layer_name', driver="GPKG")

### 3. Creation of the last SHP (images):

- For each site, list the images (between 2022/01/01 and 2022/09/30) that are covering the extent of the site. This is very long!!! Write the information in a <site_id>.json temporary file to be able to restart the script if it fails.
- Merge all information in one GeoDataFrame.
- Save the data!

In [30]:
import requests

def request_images(wkt_geometry):
    items = [] # Empty list to store return elements
    # Request
    r = requests.get(
        "https://catalogue.dataspace.copernicus.eu/resto/api/collections/Sentinel2/search.json",
        params={
            "geometry": wkt_geometry,
            "startDate": "2022-01-01T00:00:00.000Z",
            "completionDate": "2022-09-30T23:59:59.999Z",
            "cloudCover": "[0,10]",
            "maxRecords": 20,
            "page": 1,
        }
    )
    # If status_code is not 200, we have an issue
    if r.status_code == 200:
        data = r.json()
        if 'features' in data:
            items += data['features']
    return items


In [31]:
instruments_dissolved['images'] = instruments_dissolved['Polygon'].apply(lambda x: request_images(x.wkt))

In [44]:
import json


instruments_dissolved.to_csv("instruments_dissolved.csv")

# Save the images information for each site in a JSON file
for site_id, images in instruments_dissolved['images'].items():
    with open(f"{site_id}.json", "w") as f:
        json.dump(images, f)

#### Visualise some images 

In [41]:
# Display images for only 5 sites
for site_id, images in instruments_dissolved['images'].head(5).items():
    if not isinstance(images, list):
        print(f"Site {site_id} does not have a list of images.")
    else:
        for image in images:
            if not isinstance(image, dict):
                print(f"Site {site_id} has an image that is not a dictionary.")
            elif 'type' not in image or 'id' not in image or 'geometry' not in image or 'properties' not in image:
                print(f"Site {site_id} has an image with missing keys.")
            else:
                print(f"Site {site_id} has a valid image with id: {image['id']}")
                if 'thumbnail' in image['properties']:
                    display(Image(url=image['properties']['thumbnail']))


Site 10002 has a valid image with id: 3aaee0d9-5dc8-50f4-ae6a-5627e984ba2a


Site 10002 has a valid image with id: 00951cef-b763-5d0f-8fe8-b7212c7588c5


Site 10002 has a valid image with id: 0eab919b-be5b-575a-a764-c1a9d17f1bbf


Site 10002 has a valid image with id: 4743ad8e-ffc4-5b5f-a5dc-3dc933ddf292


Site 10002 has a valid image with id: 87612d35-a8cd-51cd-85d9-715ebd00f756


Site 10002 has a valid image with id: 7c89e164-3e1b-5f3e-9a4d-b9876c1c1546


Site 10002 has a valid image with id: f877e68e-3d6c-55fb-b90a-9b178039eccf


Site 10002 has a valid image with id: fa462c23-b2ee-5fee-8b81-d8ee4f3e4bb9


Site 10002 has a valid image with id: e80ac437-4085-549b-a8ef-7b5eda47b2e7


Site 10002 has a valid image with id: 99a822bd-6e77-5271-9e82-f10919f53432


Site 10002 has a valid image with id: 29f7f967-326d-50bf-9e51-3dcc08e010d6


Site 10002 has a valid image with id: c692f4a4-7a30-5ef1-b810-6b55b77e52c6


Site 10002 has a valid image with id: 62b51844-10bf-5e68-91a7-2580c09eadc8


Site 10002 has a valid image with id: 107344c3-7170-5252-bb5f-aefa6ebc76a5


Site 10002 has a valid image with id: 658abd1a-fee3-5f62-b188-f14d2c937a98


Site 10002 has a valid image with id: 1e529bea-7515-5ae8-8270-159e7153232c


Site 10002 has a valid image with id: 84a351a4-c6e2-5b28-9313-f3533da15e1c


Site 10002 has a valid image with id: 93b4cd19-9a92-5dac-bcf6-ea4d530edb19


Site 10002 has a valid image with id: bd84c0c4-51d2-5680-953c-fc16a2708553


Site 10002 has a valid image with id: 36ecc716-a2a4-566c-877a-fae6af497742


Site 10003 has a valid image with id: ec295205-630e-551e-8930-136ec9bc0014


Site 10003 has a valid image with id: bf37c4c0-1876-5d47-a205-204b9f1da5e7


Site 10003 has a valid image with id: afe07aa6-2814-5d36-9d29-67277371a24b


Site 10003 has a valid image with id: f27d6cdc-94c8-592b-a94e-c7722dfa8dae


Site 10003 has a valid image with id: 5420e51f-131d-5ea9-9189-d34d35261436


Site 10003 has a valid image with id: 69f49a1b-43ef-5d5d-b646-9db4749b4efd


Site 10003 has a valid image with id: 88fdb08b-583c-5b74-8a1f-d5f66c829e1f


Site 10003 has a valid image with id: 1ad7099f-e4ef-5699-adab-7bda9a08424d


Site 10003 has a valid image with id: 16c98f8e-3a35-5c97-b7fd-95a7c20a9166


Site 10003 has a valid image with id: 0fe544ce-2f9f-59f8-8c0d-b069c48c042d


Site 10003 has a valid image with id: 43e26710-89d6-5b59-8728-808e1c7b38ef


Site 10003 has a valid image with id: 84d1bede-4592-5cd5-b553-4fc830935cf5


Site 10003 has a valid image with id: b6f4a3e9-49b7-53ce-bbc7-ad95a3f1cc25


Site 10003 has a valid image with id: eef5be3a-d580-5514-96bd-f2662bd771ca


Site 10003 has a valid image with id: 81ebf064-9a6a-5025-bec0-c8c6de17520e


Site 10003 has a valid image with id: a6a9e62c-a6bf-5690-999f-93afaf0e0107


Site 10003 has a valid image with id: a14cf2e2-1006-5b81-8942-de98440698bb


Site 10003 has a valid image with id: d544e2dd-8730-5029-91d4-4e19ff33bdae


Site 10003 has a valid image with id: 4bdb01e7-e4a1-5279-b170-9536f36913db


Site 10003 has a valid image with id: 8d2cb599-666f-56f0-a625-6a2f712e8dff


Site 10004 has a valid image with id: 2f680482-732f-5913-b4ce-62798d78fd3b


Site 10004 has a valid image with id: 24f87785-959d-5ff3-b3b3-ef76930b0c17


Site 10004 has a valid image with id: 63140013-61bc-5b74-bdc3-26320f378149


Site 10004 has a valid image with id: c98cef7e-bd0d-5fb5-a076-b5cac635c1a9


Site 10004 has a valid image with id: c6b9f1a2-51fd-53bf-ac1c-d37b8713305b


Site 10004 has a valid image with id: f67edc8b-b0c8-5b06-8c2b-b3908b265ed2


Site 10004 has a valid image with id: 5cc7e7ae-e16d-57be-83c0-40e24b76660b


Site 10004 has a valid image with id: 0eedf788-506b-50a2-8ac6-01e7f22b21ae


Site 10004 has a valid image with id: bf74b458-5152-5772-b1af-8cde931eff6c


Site 10004 has a valid image with id: eeb84549-1732-53ef-80dd-5153ab7ff0e7


Site 10004 has a valid image with id: 86684709-c75a-56c0-b48e-32c92080711a


Site 10004 has a valid image with id: f83947ed-9759-5eaa-b272-230c46c25070


Site 10004 has a valid image with id: 59fdc9ee-d8a0-5f0f-9540-e02d87267709


Site 10004 has a valid image with id: ac71fb64-ce4f-5c8a-80ef-b206f5d27e2e


Site 10004 has a valid image with id: 7d5342b7-7902-5a4f-ac76-d6a31e173b3d


Site 10004 has a valid image with id: 45f7f344-3a43-57d9-a674-101a2b7ea5d7


Site 10004 has a valid image with id: 4f9603f0-a470-598b-84e1-c0a72b61cc6e


Site 10004 has a valid image with id: 44997ebc-feb2-5ef7-9e6e-71e3d73c2081


Site 10004 has a valid image with id: 01ef8e8e-d614-5d12-bb9b-4836f3bcbd4d


Site 10004 has a valid image with id: 47a6f6c9-3c70-5ebe-9a07-4ccb7720806d


Site 10077 has a valid image with id: 8a61d12b-36ac-5527-b1cc-835b2799cdfb


Site 10077 has a valid image with id: 431eedfa-4729-58cd-bad2-cbc0328dd0ce


Site 10077 has a valid image with id: 3f4d811c-50dc-5c07-8ee2-3e2c9a0fed09


Site 10077 has a valid image with id: cf68010f-32af-5550-807f-d391ebd158a2


Site 10077 has a valid image with id: dadef606-926b-5d7c-a24c-2a6c10a1cd07


Site 10077 has a valid image with id: abc71ddf-200c-546f-95f3-4d218b06d281


Site 10077 has a valid image with id: 1636765a-a59d-5108-a20d-5946964426f2


Site 10077 has a valid image with id: 2f3c0e8b-3484-55be-bb27-9ceaf9b3835d


Site 10077 has a valid image with id: 3bc2fcac-3997-564b-9d1e-b1e3cd23c5f0


Site 10077 has a valid image with id: b706c37c-d584-5caf-a11e-10a66223d42b


Site 10077 has a valid image with id: ba693bc5-52c6-50c4-a2fb-f54cba221a85


Site 10077 has a valid image with id: 1ee1b546-22e2-5a08-ab97-218c9e900f28


Site 10077 has a valid image with id: 26ecd5f8-76ca-5f16-a689-730a90e2dabd


Site 10077 has a valid image with id: 83eb2e77-7e1f-5947-bb82-0aab51773ba3


Site 10077 has a valid image with id: 113c6634-9ff7-59c6-b125-ec5ca106e6a7


Site 10077 has a valid image with id: 038ceff1-ee5b-5bce-b349-f32f5c12796b


Site 10077 has a valid image with id: 4f4e8ec8-6a88-5b40-a298-03bbdeecf3cb


Site 10077 has a valid image with id: f38e5985-6edb-5564-8c42-1425c399cfce


Site 10077 has a valid image with id: 262c95df-774c-55bd-9d28-5d5c26798b08


Site 10077 has a valid image with id: 5d1a0251-9dda-5ffc-9fdf-6e229038b788


Site 10202 has a valid image with id: ffcb5df4-d22c-545a-99e0-5998f012de85


Site 10202 has a valid image with id: e259bada-8c84-5f55-aeca-e4376eecf5d5


Site 10202 has a valid image with id: a9ef7679-f2b5-5eaa-8de2-2c42e403495a


In [22]:
images = instruments_dissolved['Polygon'].apply(request_images)
# images = request_images(wkt_geometry)