In [1]:
import os
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
data_folder = f"../data"

sample_filename = "muestreos_parcelas_utm.parquet"

df_samples = pd.read_parquet(f"{data_folder}/{sample_filename}")
#df_samples.head(20).to_csv("sample_parcelas.txt", sep="\t")

df_samples["fecha"] = pd.to_datetime(df_samples["fecha"])
df_samples.sort_values(by="fecha", inplace=True)

df_samples['año'] = df_samples['fecha'].dt.year

df_samples.info()

<class 'pandas.core.frame.DataFrame'>
Index: 581793 entries, 119217 to 541761
Data columns (total 63 columns):
 #   Column                                              Non-Null Count   Dtype         
---  ------                                              --------------   -----         
 0   generated_muestreos                                 581793 non-null  datetime64[us]
 1   codparcela                                          581793 non-null  category      
 2   provincia                                           581793 non-null  string        
 3   municipio                                           581793 non-null  object        
 4   fecha                                               581793 non-null  datetime64[us]
 5   campaña                                             581793 non-null  uint16        
 6   poligono                                            581731 non-null  UInt16        
 7   parcela                                             581763 non-null  UInt16        

In [3]:
# Adding the number of days till next visit for each row
df_samples["next_date"] = df_samples.groupby("codparcela", observed=True)["fecha"].shift(-1)
df_samples["days_until_next_visit"] = (df_samples["next_date"] - df_samples["fecha"]).dt.days


# # Removing the parcels with only one entry and the last entry for every parcel (we would need to also add the y value of the next entry)

df_samples = df_samples.dropna(subset=["days_until_next_visit"])  # 5150 entries removed
df_samples["days_until_next_visit"] = df_samples["days_until_next_visit"].astype("int32")


In [7]:
# Realizar el groupby y las agregaciones
group_dates = df_samples.groupby('codparcela').agg({'fecha': ['min', 'max', 'count']}).reset_index()

# Renombrar las columnas resultantes
group_dates.columns = ['codparcela', 'fecha_primera_muestra', 'fecha_ultima_muestra', 'n_muestras']

# Mostrar los resultados
group_dates = group_dates.sort_values(by="n_muestras", ascending=False)

  group_dates = df_samples.groupby('codparcela').agg({'fecha': ['min', 'max', 'count']}).reset_index()


In [8]:
#group_dates = df_samples.groupby(["codparcela"])["fecha"].agg([('min_fecha', 'min'), ('max_fecha', 'max')]).reset_index()
#group_max_diff_days = df_samples.groupby(["codparcela", "año", "days_until_next_visit"])["days_until_next_visit"].agg([('max_days_until_next_visit', 'max')]).reset_index()

#group_coords = df_samples.groupby("codparcela")[["municipio", "102_coordenada_x_(utm)", "103_coordenada_y_(utm)", "huso_etrs89_regcan95"]].apply("first").reset_index()
group_coords = df_samples.groupby('codparcela').agg({'municipio': 'first', 
                                      '102_coordenada_x_(utm)': 'first', 
                                      '103_coordenada_y_(utm)': 'first', 
                                      'huso_etrs89_regcan95': 'first'}).reset_index()


  group_coords = df_samples.groupby('codparcela').agg({'municipio': 'first',


In [5]:
group_coords.head()

Unnamed: 0,codparcela,municipio,102_coordenada_x_(utm),103_coordenada_y_(utm),huso_etrs89_regcan95
0,000-00000-00,algodonales,276171.0,4082574.0,30
1,000-00000-00-0,villamartin,263498.79,4080502.92,30
2,000-00900-00-U0,alfacar,449521.0,4124050.0,30
3,000-00999-00-00,nivar,449772.0,4123208.0,30
4,000-00999-00-FC,nivar,449772.0,4123208.0,30


In [22]:
df_parcelas  = pd.merge(group_dates, group_coords, on='codparcela', how='inner')
df_parcelas.head()

Unnamed: 0,codparcela,fecha_primera_muestra,fecha_ultima_muestra,n_muestras,municipio,102_coordenada_x_(utm),103_coordenada_y_(utm),huso_etrs89_regcan95
0,015-00002-00-00,2006-03-08,2021-03-16,927,torredonjimeno,413322.0,4184311.0,30
1,043-00008-00-00,2006-04-10,2017-11-13,814,olvera,295019.0,4091226.0,30
2,017-00204-00-00,2006-05-10,2019-09-03,757,sabiote,472587.0,4211647.0,30
3,010-00016-00-00,2006-03-06,2017-07-18,725,canena,455318.0,4213752.0,30
4,007-00018-01-01,2010-03-08,2021-03-24,706,adamuz,371625.0,4211704.0,30


In [23]:
import utm


def utm_to_latlon(row):
    try:
        lat, lon = utm.to_latlon(row["102_coordenada_x_(utm)"], row["103_coordenada_y_(utm)"], 30, 'S')
    except:
        lat, lon = -9999999, -9999999

    return pd.Series([lat, lon], index=['lat', 'lon'])


df_parcelas[['lat', 'lon']] = df_parcelas.apply(utm_to_latlon, axis=1)

df_parcelas[(df_parcelas["lat"] < 40) &
               (df_parcelas["lat"] > 35) &
               (df_parcelas["lat"] > -10) &
               (df_parcelas["lat"] < 0)]

df_parcelas["n_muestras_cumsum"] = df_parcelas["n_muestras"].cumsum()

df_parcelas.head()

Unnamed: 0,codparcela,fecha_primera_muestra,fecha_ultima_muestra,n_muestras,municipio,102_coordenada_x_(utm),103_coordenada_y_(utm),huso_etrs89_regcan95,lat,lon,n_muestras_cumsum
0,015-00002-00-00,2006-03-08,2021-03-16,927,torredonjimeno,413322.0,4184311.0,30,37.802069,-3.984598,927
1,043-00008-00-00,2006-04-10,2017-11-13,814,olvera,295019.0,4091226.0,30,36.944813,-5.301941,1741
2,017-00204-00-00,2006-05-10,2019-09-03,757,sabiote,472587.0,4211647.0,30,38.052148,-3.312451,2498
3,010-00016-00-00,2006-03-06,2017-07-18,725,canena,455318.0,4213752.0,30,38.070431,-3.509408,3223
4,007-00018-01-01,2010-03-08,2021-03-24,706,adamuz,371625.0,4211704.0,30,38.043972,-4.46301,3929


In [26]:
df_parcelas.to_csv("parcelas_download.txt", sep="\t", index=None)

In [93]:
df_parcelas = df_parcelas[df_parcelas["n_muestras_cumsum"]>578377]

In [94]:
df_parcelas.info()

<class 'pandas.core.frame.DataFrame'>
Index: 578 entries, 4661 to 5238
Data columns (total 11 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   codparcela              578 non-null    category      
 1   fecha_primera_muestra   578 non-null    datetime64[us]
 2   fecha_ultima_muestra    578 non-null    datetime64[us]
 3   n_muestras              578 non-null    int64         
 4   municipio               578 non-null    object        
 5   102_coordenada_x_(utm)  436 non-null    float64       
 6   103_coordenada_y_(utm)  436 non-null    float64       
 7   huso_etrs89_regcan95    578 non-null    int64         
 8   lat                     578 non-null    float64       
 9   lon                     578 non-null    float64       
 10  n_muestras_cumsum       578 non-null    int64         
dtypes: category(1), datetime64[us](2), float64(4), int64(3), object(1)
memory usage: 220.8+ KB


In [95]:
df_parcelas.head()

Unnamed: 0,codparcela,fecha_primera_muestra,fecha_ultima_muestra,n_muestras,municipio,102_coordenada_x_(utm),103_coordenada_y_(utm),huso_etrs89_regcan95,lat,lon,n_muestras_cumsum
4661,010-00073-00-00,2006-07-24,2008-11-13,15,arenas del rey,419387.0,4094145.0,30,36.989987,-3.905878,578392
4662,024-00005-01-0,2020-07-07,2020-10-19,15,pegalajar,444574.0,4178006.0,30,37.747676,-3.629142,578407
4663,006-00057-02-01,2020-04-08,2020-11-17,15,colomera,440132.72,4138854.93,30,37.39453,-3.676353,578422
4664,011-00036-49-00,2015-03-18,2015-06-24,15,montoro,381186.0,4209486.0,30,38.025293,-4.35371,578437
4665,024-00079-00-00,2006-03-22,2006-06-28,15,torres,453672.0,4187896.0,30,37.837319,-3.526507,578452


In [96]:
from datetime import datetime, date
import requests
import matplotlib.pyplot as plt
import numpy as np
import time
import logging

logger = logging.getLogger(__name__)
logger.level = logging.INFO

def request_with_cooloff(api_url: str, payload: str, num_attempts: int):
    cooloff = 1

    for call_count in range(cooloff):
        try:
            response = requests.get(api_url, params=payload)
            response.raise_for_status()
            break

        except requests.exceptions.ConnectionError as e:
            logger.info("API refused the connection")
            logger.warning(e)
            if call_count != (num_attempts - 1):
                time.sleep(cooloff)
                cooloff *= 2
                call_count += 1
                continue
            else:
                raise

        except requests.exceptions.HTTPError as e:
            logger.warning(e)
            if response.status_code == 404:
                raise

            logger.info(f"API return code {response.status_code} cooloff at {call_count}")
            if call_count != (num_attempts - 1):
                time.sleep(cooloff)
                cooloff *= 2
                call_count += 1
                continue
            else:
                raise

    # We got through the loop without error so we've received a valid response
    return response

def getTimeseriesForPoint(covId, tsvBaseURL, start, end, lat, lon, printURL=False):
    tsURL = tsvBaseURL + covId + '/point'
    payload = {
        'lon': str(lon),
        'lat': str(lat),
        'startDate': start.strftime('%Y-%m-%d'),
        'endDate': end.strftime('%Y-%m-%d')
    }

    if printURL:
        print(tsURL, payload)

    # Introduce a cooldown of 1 second between API calls
    response = request_with_cooloff(api_url=tsURL, payload=payload, num_attempts=15)
    timeseries = response.json()['results']
    
    return timeseries

In [97]:
for i, row in df_parcelas.iterrows():

    id_parcela = row["codparcela"]
    start_time = datetime.strptime(row["fecha_primera_muestra"].strftime("%d-%m-%Y"), "%d-%m-%Y")
    end_time =datetime.strptime(row["fecha_ultima_muestra"].strftime("%d-%m-%Y"), "%d-%m-%Y")

    start_time_document = start_time.strftime("%Y_%m_%d")
    end_time_document = end_time.strftime("%Y_%m_%d")

    lat_pd = row["lat"]
    lon_pd = row["lon"]

    cumsum = row["n_muestras_cumsum"]
    n_samples = row["n_muestras"] 
    


    TSlayers = ["BIOPAR_FAPAR_V2_GLOBAL",
        "BIOPAR_NDVI_V2_GLOBAL"]

    for l in TSlayers:
        ts = getTimeseriesForPoint(covId=l,
                            tsvBaseURL='https://services.terrascope.be/timeseries/v1.0/ts/',
                                start=start_time,
                                end=end_time,
                                lat=lat_pd,

                                
                                lon=lon_pd,
                                printURL=False)

        df = pd.DataFrame()
        n_points = len(ts)
        dates = [ts[i]["date"] for i in range(n_points)]
        values = [ts[i]["result"]["average"] for i in range(n_points)]

        df["date"] = dates
        df["value"] = values

        filename = f"{id_parcela}_{l}_{start_time_document}_{end_time_document}.txt".replace("/", "")

        df.to_csv(f"{data_folder}/datos_parcela/{filename}", sep="\t")

    print(f"Parcela:{id_parcela}, cumsum samples={cumsum}")


Parcela:010-00073-00-00, cumsum samples=578392
[25/Nov/2023 23:03:27] INFO - API return code 500 cooloff at 0


KeyError: 'results'

In [81]:
print(f"{data_folder}/datos_parcela/{id_parcela}_{l}_{start_time_document}_{end_time_document}.txt")

../data/datos_parcela/115-00087-00-/2_BIOPAR_FAPAR_V2_GLOBAL_2006_04_11_2007_11_26.txt


In [87]:
df_parcelas["codparcela"][df_parcelas["codparcela"].str.contains("/")]

3142    115-00087-00-/2
Name: codparcela, dtype: category
Categories (5239, object): ['000-00000-00', '000-00000-00-0', '000-00900-00-U0', '000-00999-00-00', ..., 'RAIF-2', 'RAIF-3', 'RAIF-4', 'RAIF-5']

In [34]:
response

NameError: name 'response' is not defined

Unnamed: 0,date,value
0,2006-03-11,0.384
1,2006-03-21,0.448
2,2006-04-01,0.396
3,2006-04-11,0.384
4,2006-04-21,0.292
...,...,...
529,2020-11-21,0.38
530,2020-12-01,0.416
531,2020-12-11,0.356
532,2020-12-21,0.432


In [37]:
tss[0][1]["date"]

'2006-03-20'

In [103]:
import requests
import matplotlib.pyplot as plt
import numpy as np
tsvBaseURL='https://services.terrascope.be/timeseries/v1.0/ts/'

response = requests.get(tsvBaseURL) #this returns the layers that are available

if response.status_code == 200:
    layerlist = response.json()['layers']
else:
    raise IOError(response.text)

Unnamed: 0,codparcela,min_fecha,max_fecha,municipio,102_coordenada_x_(utm),103_coordenada_y_(utm),huso_etrs89_regcan95,days,lat,lon
4,000-00999-00-FC,2006-05-02,2006-10-25,nivar,449772.0,4123208.0,30.0,176.0,37.254069,-3.566399
5,001-00001-00-06,2012-03-27,2012-04-23,montejicar,455827.26,4161193.04,30.0,27.0,37.596752,-3.500392
18,001-00004-00-10,2006-04-28,2006-11-08,guadalcazar,326630.0,4183572.0,30.0,194.0,37.78307,-4.968778
22,001-00006,2020-04-07,2020-10-13,ubeda,464989.0,4221345.0,30.0,189.0,38.139289,-3.399527
23,001-00006-00-00,2006-03-22,2006-11-14,monturque,361200.0,4152500.0,30.0,237.0,37.509016,-4.57045
