# EDA - Anàlisi de Dades Agrícoles i Climàtiques

Aquest notebook realitza una exploració inicial de tots els fitxers disponibles a la carpeta `data/` amb l'objectiu d'aconseguir unificar tota la informació disponible en data frames.


In [1]:
import os

import country_converter as coco
import numpy as np
import pandas as pd


In [2]:
# Configuració de pandas per mostrar tots els prints complets
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
pd.set_option('display.precision', 6)

In [3]:
# Definir la ruta base dels fitxers de dades
DATA_DIR = "../data"
CLIMATE_DATA_DIR = os.path.join(DATA_DIR, "climate_data")
FAOSTAT_DIR = os.path.join(DATA_DIR, "faostat")

In [4]:
# Inicialitzar el diccionari principal
data_dict = {
    "climate_data": {},
    "faostat": {}
}

In [5]:
# Carregar fitxers de climate_data
print("Carregant fitxers de climate_data...")

# CSV: Climate_Indicators_Annual_Mean_Global_Surface_Temperature.csv
climate_temp_file = os.path.join(CLIMATE_DATA_DIR, "Climate_Indicators_Annual_Mean_Global_Surface_Temperature.csv")
data_dict["climate_data"]["Climate_Indicators_Annual_Mean_Global_Surface_Temperature"] = pd.read_csv(climate_temp_file)
print(f"Carregat: Climate_Indicators_Annual_Mean_Global_Surface_Temperature.csv")

# CSV: World_Development_Indicators.csv
wdi_file = os.path.join(CLIMATE_DATA_DIR, "World_Development_Indicators.csv")
data_dict["climate_data"]["World_Development_Indicators"] = pd.read_csv(wdi_file)
print(f"Carregat: World_Development_Indicators.csv")

# Excel: tas_pr_annual_mean_historical.xlsx
excel_file = os.path.join(CLIMATE_DATA_DIR, "tas_pr_annual_mean_historical.xlsx")
try:
    # Llegir totes les fulls de l'Excel
    excel_data = pd.ExcelFile(excel_file)
    for sheet_name in excel_data.sheet_names:
        df_name = f"annual_mean_historical_{sheet_name}"
        data_dict["climate_data"][df_name] = pd.read_excel(excel_file, sheet_name=sheet_name)
        print(f"Carregat: tas_pr_annual_mean_historical.xlsx (full: {sheet_name})")
except Exception as e:
    print(f"Error carregant Excel: {e}")

print(f"\nTotal fitxers climate_data carregats: {len(data_dict['climate_data'])}")


Carregant fitxers de climate_data...
Carregat: Climate_Indicators_Annual_Mean_Global_Surface_Temperature.csv
Carregat: World_Development_Indicators.csv
Carregat: tas_pr_annual_mean_historical.xlsx (full: pr)
Carregat: tas_pr_annual_mean_historical.xlsx (full: tas)

Total fitxers climate_data carregats: 4


In [6]:
# Carregar fitxers de faostat
print("Carregant fitxers de faostat...")

# Llistar totes les carpetes de faostat
faostat_folders = [f for f in os.listdir(FAOSTAT_DIR) 
                   if os.path.isdir(os.path.join(FAOSTAT_DIR, f))]

for folder in sorted(faostat_folders):
    folder_path = os.path.join(FAOSTAT_DIR, folder)
    # Eliminar el sufix '_E_All_Data_(Normalized)' del nom de la carpeta (ja que totes ho tenen)
    folder_key = folder.replace('_E_All_Data_(Normalized)', '')
    
    # Inicialitzar el diccionari per aquesta carpeta
    data_dict["faostat"][folder_key] = {}
    
    # Llistar tots els fitxers CSV de la carpeta
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    
    for csv_file in sorted(csv_files):
        csv_path = os.path.join(folder_path, csv_file)
        # Utilitzar el nom del fitxer (sense extensió) com a clau
        file_key = csv_file.replace('.csv', '')
        
        try:
            data_dict["faostat"][folder_key][file_key] = pd.read_csv(csv_path)
            print(f"Carregat: {folder}/{csv_file}")
        except Exception as e:
            print(f"Error carregant {folder}/{csv_file}: {e}")

print(f"\nTotal carpetes faostat processades: {len(data_dict['faostat'])}")


Carregant fitxers de faostat...
Carregat: Emissions_Totals_E_All_Data_(Normalized)/Emissions_Totals_E_All_Data_(Normalized).csv
Carregat: Emissions_Totals_E_All_Data_(Normalized)/Emissions_Totals_E_AreaCodes.csv
Carregat: Emissions_Totals_E_All_Data_(Normalized)/Emissions_Totals_E_Elements.csv
Carregat: Emissions_Totals_E_All_Data_(Normalized)/Emissions_Totals_E_Flags.csv
Carregat: Emissions_Totals_E_All_Data_(Normalized)/Emissions_Totals_E_ItemCodes.csv
Carregat: Emissions_Totals_E_All_Data_(Normalized)/Emissions_Totals_E_Sources.csv
Carregat: Employment_Indicators_Agriculture_E_All_Data_(Normalized)/Employment_Indicators_Agriculture_E_All_Data_(Normalized).csv
Carregat: Employment_Indicators_Agriculture_E_All_Data_(Normalized)/Employment_Indicators_Agriculture_E_AreaCodes.csv
Carregat: Employment_Indicators_Agriculture_E_All_Data_(Normalized)/Employment_Indicators_Agriculture_E_Elements.csv
Carregat: Employment_Indicators_Agriculture_E_All_Data_(Normalized)/Employment_Indicators_Agri

  data_dict["faostat"][folder_key][file_key] = pd.read_csv(csv_path)


Carregat: Inputs_FertilizersNutrient_E_All_Data_(Normalized)/Inputs_FertilizersNutrient_E_All_Data_(Normalized).csv
Carregat: Inputs_FertilizersNutrient_E_All_Data_(Normalized)/Inputs_FertilizersNutrient_E_AreaCodes.csv
Carregat: Inputs_FertilizersNutrient_E_All_Data_(Normalized)/Inputs_FertilizersNutrient_E_Elements.csv
Carregat: Inputs_FertilizersNutrient_E_All_Data_(Normalized)/Inputs_FertilizersNutrient_E_Flags.csv
Carregat: Inputs_FertilizersNutrient_E_All_Data_(Normalized)/Inputs_FertilizersNutrient_E_ItemCodes.csv
Carregat: Inputs_LandUse_E_All_Data_(Normalized)/Inputs_LandUse_E_All_Data_(Normalized).csv
Carregat: Inputs_LandUse_E_All_Data_(Normalized)/Inputs_LandUse_E_AreaCodes.csv
Carregat: Inputs_LandUse_E_All_Data_(Normalized)/Inputs_LandUse_E_Elements.csv
Carregat: Inputs_LandUse_E_All_Data_(Normalized)/Inputs_LandUse_E_Flags.csv
Carregat: Inputs_LandUse_E_All_Data_(Normalized)/Inputs_LandUse_E_ItemCodes.csv
Carregat: Inputs_Pesticides_Use_E_All_Data_(Normalized)/Inputs_Pes

  data_dict["faostat"][folder_key][file_key] = pd.read_csv(csv_path)


Carregat: Production_Crops_Livestock_E_All_Data_(Normalized)/Production_Crops_Livestock_E_All_Data_(Normalized).csv
Carregat: Production_Crops_Livestock_E_All_Data_(Normalized)/Production_Crops_Livestock_E_AreaCodes.csv
Carregat: Production_Crops_Livestock_E_All_Data_(Normalized)/Production_Crops_Livestock_E_Elements.csv
Carregat: Production_Crops_Livestock_E_All_Data_(Normalized)/Production_Crops_Livestock_E_Flags.csv
Carregat: Production_Crops_Livestock_E_All_Data_(Normalized)/Production_Crops_Livestock_E_ItemCodes.csv

Total carpetes faostat processades: 9


In [7]:
# Resum de tots els dataframes carregats
print("Resum del diccionari data_dict")

print(f"\n CLIMATE_DATA ({len(data_dict['climate_data'])} fitxers):")
for key in sorted(data_dict['climate_data'].keys()):
    shape = data_dict['climate_data'][key].shape
    print(f"  - {key}: {shape[0]} files x {shape[1]} columnes")

print(f"\n FAOSTAT ({len(data_dict['faostat'])} carpetes):")
for folder_key in sorted(data_dict['faostat'].keys()):
    num_files = len(data_dict['faostat'][folder_key])
    print(f"  - {folder_key}: {num_files} fitxers")
    for file_key in sorted(k for k in data_dict['faostat'][folder_key].keys() if 'All_Data' in k):
            shape = data_dict['faostat'][folder_key][file_key].shape
            print(f"    • Main File:{file_key}: {shape[0]} files x {shape[1]} columnes")

print(f"Tots els fitxers han estat carregats correctament al diccionari data_dict")


Resum del diccionari data_dict

 CLIMATE_DATA (4 fitxers):
  - Climate_Indicators_Annual_Mean_Global_Surface_Temperature: 231 files x 74 columnes
  - World_Development_Indicators: 1875 files x 69 columnes
  - annual_mean_historical_pr: 246 files x 126 columnes
  - annual_mean_historical_tas: 246 files x 126 columnes

 FAOSTAT (9 carpetes):
  - Emissions_Totals: 6 fitxers
    • Main File:Emissions_Totals_E_All_Data_(Normalized): 2500184 files x 15 columnes
  - Employment_Indicators_Agriculture: 7 fitxers
    • Main File:Employment_Indicators_Agriculture_E_All_Data_(Normalized): 188070 files x 17 columnes
  - Environment_LandCover: 5 fitxers
    • Main File:Environment_LandCover_E_All_Data_(Normalized): 210986 files x 12 columnes
  - Inputs_FertilizersNutrient: 5 fitxers
    • Main File:Inputs_FertilizersNutrient_E_All_Data_(Normalized): 241859 files x 13 columnes
  - Inputs_LandUse: 5 fitxers
    • Main File:Inputs_LandUse_E_All_Data_(Normalized): 413211 files x 13 columnes
  - Inputs_P

In [8]:
# Verificació: mostrar les claus del diccionari
print("Estructura del diccionari data_dict:")
print(f"\nClaus principals: {list(data_dict.keys())}")
print(f"\nClaus de climate_data: {list(data_dict['climate_data'].keys())}")
print(f"\nClaus de faostat: {list(data_dict['faostat'].keys())}")


Estructura del diccionari data_dict:

Claus principals: ['climate_data', 'faostat']

Claus de climate_data: ['Climate_Indicators_Annual_Mean_Global_Surface_Temperature', 'World_Development_Indicators', 'annual_mean_historical_pr', 'annual_mean_historical_tas']

Claus de faostat: ['Emissions_Totals', 'Employment_Indicators_Agriculture', 'Environment_LandCover', 'Inputs_FertilizersNutrient', 'Inputs_LandUse', 'Inputs_Pesticides_Use', 'Population', 'Prices', 'Production_Crops_Livestock']


## Construcció dels DataFrame Final (`agri_product_df` i `agri_country_year_df`)

Els dataframes final seran `agri_product_df`, que contrindrà informació per cada Area (país) x ramaderia/cultiu x any, i `agri_country_year_df`, que contindrà indicadors externs útils per comparar amb l'anterior a nivell Area (país) x any

### Definició de Funcionn generals utilitzades a lo llarg del codi

Les funcions han sigut definides a posteriori de veure patrons que podien generalitzar-se

In [9]:
# Funció per analitzar qualsevol dataset de faostat
def analitzar_dataset_faostat(key: str) -> None:
    """
    Funció per analitzar un dataset de faostat.
    
    Mostra:
    - Informació del dataframe principal (All_Data): dimensions, columnes, info(), head()
    - Head() de tots els altres dataframes d'aquesta carpeta (els que no són All_Data)
    - Elements únics de les columnes 'Item' i 'Element' del main_df
    Paràmetres:
    -----------
    key : str
        Clau del dataset a analitzar (ex: 'Production_Crops_Livestock', 'Inputs_FertilizersNutrient')
    """
    print(f"Anàlisi del dataset: {key}")
    
    # Trobar el dataframe principal (All_Data)
    all_data_key = None
    for k in data_dict["faostat"][key].keys():
        if 'All_Data' in k:
            all_data_key = k
            break
    
    # Obtenir el dataframe principal
    main_df = data_dict["faostat"][key][all_data_key]
    
    # Mostrar informació del dataframe principal
    print(f"\nDataframe principal ({all_data_key}):")
    print(f"Dimensions: {main_df.shape[0]:,} files x {main_df.shape[1]} columnes")
    print(f"\n Columnes ({len(main_df.columns)}):")
    for i, col in enumerate(main_df.columns, 1):
        print(f"  {i:2d}. {col}")
    
    print(f"\n Primeres files del dataframe principal:")
    display(main_df.head())
    
    for col in ["Item", "Element", "Indicator", "Source"]:
        if col in main_df.columns:
            print(f"\n Valors únics de la columna '{col}':")
            unique_values = main_df[col].dropna().unique()
            print(f"Total únics: {len(unique_values)} (mostrant com a màxim 50)")
            display(pd.DataFrame(unique_values[:50], columns=[col]))

    # Mostrar head() dels altres dataframes
    print(f" Altres dataframes de {key}:")
    
    for k in sorted(data_dict["faostat"][key].keys()):
        if 'All_Data' not in k:
            print(f"\n Contingut de {k}:")
            display(data_dict["faostat"][key][k].head())
            print("\n")


# Després de veure que el patró de la fusió de dataframes sempre serà el mateix, però amb diferents dataframes.
def merge_agri_dataframes(
    df_initial: pd.DataFrame,
    df_to_merge: pd.DataFrame,
    df_name_initial: str = "df_initial",
    df_name_to_merge: str = "df_to_merge",
    merge_cols: list[str] = ["Area Code", "Area Code (M49)", "Area", "Year Code", "Year"]
) -> pd.DataFrame:
    """
    Funció per fer merge de dos dataframes agrícoles.
    
    Paràmetres:
    -----------
    df_initial : DataFrame
        DataFrame inicial al qual s'afegirà la informació
    df_to_merge : DataFrame
        DataFrame que es fusionarà amb l'inicial
    df_name_initial : str
        Nom del dataframe inicial (per als prints)
    df_name_to_merge : str
        Nom del dataframe a mergear (per als prints)
    
    Retorna:
    --------
    DataFrame
        DataFrame resultat del merge
    """
    print(f"Nombre de files de {df_name_initial}: {df_initial.shape[0]}")
    print(f"Nombre de columnes de {df_name_initial}: {df_initial.shape[1]}")
    
    print(f"Nombre de files de {df_name_to_merge}: {df_to_merge.shape[0]}")
    print(f"Nombre de columnes de {df_name_to_merge}: {df_to_merge.shape[1]}")
    
    # Fem el merge/join
    df_result = df_initial.merge(
        df_to_merge,
        on=merge_cols,
        how="left"
    )
    
    print(f"Nombre final de files del resultat: {df_result.shape[0]}")
    print(f"Nombre final de columnes del resultat: {df_result.shape[1]}")

    display(df_result[(df_result["Area"] == "Spain") & (df_result["Year"] == 2020)].head())
    
    return df_result

# També podem generalitzar el procés de creació de dataframes filtrats.
def crear_df_faostat_filtrat(
    key: str,
    df_name: str,
    column_mapping: dict,
    values_to_keep: dict,
    data_dict: dict = data_dict,
    delete_note_flag_columns: bool = True
) -> pd.DataFrame:
    """
    Generalitza el processament d'un dataset FAOSTAT:
    - key: clau principal dins de data_dict['faostat']
    - df_name: nom variable a assignar (string; només per print, retorna el df)
    - column_mapping: dict de mapping per renombrar columnes
    - values_to_keep: dict amb claus=noms de columnes (post-mapping) i valors=llista de valors a mantenir
                      Exemple: {'Item': ['x', 'y'], 'Element': ['z'], 'Source': ['FAO TIER 1']}
    """
    # Trobar el dataframe principal (All_Data)
    all_data_key = None
    for k in data_dict["faostat"][key].keys():
        if "All_Data" in k:
            all_data_key = k
            break

    if all_data_key is None:
        raise ValueError(f"No s'ha trobat cap arxiu 'All_Data' per '{key}'")

    # Còpia del dataframe principal
    df = data_dict["faostat"][key][all_data_key].copy()

    print(f"Creació de {df_name}")

    # Eliminar les columnes 'Note' i 'Flag'
    if delete_note_flag_columns:
        df = df.drop(columns=["Note", "Flag"], errors="ignore")

    # Remapejar les columnes
    df = df.rename(columns=column_mapping)

    # Filtrar amb les condicions especificades a values_to_keep
    for col_name, values_list in values_to_keep.items():
        if col_name in df.columns:
            df = df[df[col_name].isin(values_list)].copy()
        else:
            print(f"Advertència: La columna '{col_name}' no existeix al dataframe")

    # Mostrem els valors diferents de 'Unit' per veure si hi ha valors únics
    print("\nValors diferents de 'Unit':")
    groupby_cols = [col for col in values_to_keep.keys() if col in df.columns]
    if groupby_cols and "Unit" in df.columns:
        print(df.groupby(groupby_cols)["Unit"].unique())

    print(f"\nDimensions finals: {df.shape[0]:,} files x {df.shape[1]} columnes")
    print(f"\nColumnes finals ({len(df.columns)}):")
    for i, col in enumerate(df.columns, 1):
        print(f"  {i:2d}. {col}")

    print(f"\nPrimeres files de {df_name}:")
    display(df.head())

    print(f"\nResum de valors únics:")
    for col_name in values_to_keep.keys():
        if col_name in df.columns:
            print(f"  - {col_name} únics: {df[col_name].nunique()} a {sorted(df[col_name].unique())}")

    return df


# Fem una funció ja que l'estructura de les columnes és la mateixa per a tots els dataframes.
def pivot_faostat_df(
    df_to_pivot: pd.DataFrame,
    index_cols: list[str] = ["Area Code", "Area Code (M49)", "Area", "Year Code", "Year"]
) -> pd.DataFrame:
    """
    Pivot landcover DataFrame so that each landcover category+metric becomes a column with values in ha.
    Parameters
    ----------
    df_to_pivot : pd.DataFrame
        The landcover dataframe to be pivoted, with a 'pivot_col' and 'Value' columns.
    index_cols: list[str]
        The columns to be used as index in the pivot table.
    Returns
    -------
    pivot_df : pd.DataFrame
        Pivoted DataFrame with wide columns for each landcover category+metric.
    """
    pivot_df = df_to_pivot.pivot_table(
        index=index_cols,
        columns="pivot_col",
        values="Value",
        aggfunc="first"
    ).reset_index()
    pivot_df.columns.name = None

    display(pivot_df[pivot_df['Area']=='Spain'].head())
    return pivot_df


### Pas 1: Creació de `agri_df` (Base de dades agrícoles)

La base per a la construcció del dataframe final serà el fitxer **`Production_Crops_Livestock_E_All_Data_(Normalized).csv`** ja que és el que conté la major informació sobre producció agrícola i ramadera.

**Què conté aquest dataset:**
- Producció
- Àrea cosechada
- Rendiment
- Cap de bestiar
- etc.

Per país x cultiu/ramaderia x any

In [10]:
# Pas 1: Assignar agri_df amb el dataframe de Production_Crops_Livestock
agri_df = data_dict["faostat"]["Production_Crops_Livestock"]["Production_Crops_Livestock_E_All_Data_(Normalized)"].copy()

print("Agricultural dataframe (agri_df)")
print(f"\nDimensions: {agri_df.shape[0]:,} files x {agri_df.shape[1]} columnes")
print(f"\nColumnes ({len(agri_df.columns)}):")
for i, col in enumerate(agri_df.columns, 1):
    print(f"  {i:2d}. {col}")


print(f"\nPrimeres files:")
agri_df.head()


Agricultural dataframe (agri_df)

Dimensions: 4,116,252 files x 14 columnes

Columnes (14):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item Code (CPC)
   6. Item
   7. Element Code
   8. Element
   9. Year Code
  10. Year
  11. Unit
  12. Value
  13. Flag
  14. Note

Primeres files:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item Code (CPC),Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note
0,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1961,1961,ha,0.0,A,
1,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1962,1962,ha,0.0,A,
2,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1963,1963,ha,0.0,A,
3,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1964,1964,ha,0.0,A,
4,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1965,1965,ha,0.0,A,


In [11]:
# Observem el contingut de la resta de dataframes de Production_Crops_Livestock per veure si realment els necessitem o 
# tota la informació del dataframe anterior és suficient, ja que sembla que ho conté tot.

for key in data_dict["faostat"]["Production_Crops_Livestock"].keys(): 
    if 'All_Data' not in key:
        print(f"Contingut de {key}:")
        display(data_dict["faostat"]["Production_Crops_Livestock"][key].head())
        print("\n")

Contingut de Production_Crops_Livestock_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,2,'004,Afghanistan
1,5100,'002,Africa
2,3,'008,Albania
3,4,'012,Algeria
4,5200,'019,Americas




Contingut de Production_Crops_Livestock_E_Elements:


Unnamed: 0,Element Code,Element
0,5312,Area harvested
1,5423,Extraction Rate
2,5313,Laying
3,5318,Milk Animals
4,5319,Prod Popultn




Contingut de Production_Crops_Livestock_E_Flags:


Unnamed: 0,Flag,Description
0,A,Official figure
1,E,Estimated value
2,I,Value imputed by a receiving agency
3,M,Missing value; data cannot exist
4,X,Figure from external organization




Contingut de Production_Crops_Livestock_E_ItemCodes:


Unnamed: 0,Item Code,CPC Code,Item
0,101,'01195,Canary seed
1,1016,'02123,Goats
2,1017,'21116,Meat of goat; fresh or chilled
3,1018,'21156,Edible offal of goat; fresh; chilled or frozen
4,1019,'21515,Goat fat; unrendered






In [12]:
# Veiem els valors únics de Element
agri_df["Element"].unique()

array(['Area harvested', 'Yield', 'Production', 'Stocks',
       'Producing Animals/Slaughtered', 'Yield/Carcass Weight', 'Laying',
       'Milk Animals'], dtype=object)

En aquest cas, no té sentit fer el pivot de les taules ja que les informacions dels elements són molt diferents.
No tindria sentit tenir una columna de animals per cap cultiu o superficie explotada per animals.

In [13]:
# Lo únic que sembla que podem utilitzar és la descripció de les Flags, això que ho afegin al dataframe principal
# i passem a l'anàlisi del següent dataset.

# Carreguem el dataset de Production_Crops_Livestock_E_Flags
flags_df = data_dict["faostat"]["Production_Crops_Livestock"]["Production_Crops_Livestock_E_Flags"].copy()

# Elimina espais inicials i finals dels noms de les columnes de flags_df
# Hem hagut de debuggear el nom de la columna 'Flag' perquè no coincidia amb el nom de la columna del dataframe principal
# i el motiu era l'espai que hi havia al nom de la columna.
flags_df.columns = flags_df.columns.str.strip()


# Afegim la descripció de les flags al dataframe principal
# Renombrem 'Description' a 'Flag_Description' per afegir-la al dataframe principal 
# i eliminem la columna 'Note' que no utilitzarem
flags_df = flags_df.rename(columns={'Description': 'Flag_Description'})
agri_df = pd.merge(agri_df, flags_df, on='Flag', how='left')
agri_df = agri_df.drop(columns=['Note'])

# Fem el display amb Spain per veure alguna dada en concret.
display(
    agri_df[
        (agri_df['Area'] == 'Spain') &
        (agri_df['Item'] == 'Apples') &
        (agri_df['Value'] > 0)
    ].head()
)

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item Code (CPC),Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Flag_Description
2562065,203,'724,Spain,515,'01341,Apples,5312,Area harvested,1961,1961,ha,16800.0,A,Official figure
2562066,203,'724,Spain,515,'01341,Apples,5312,Area harvested,1962,1962,ha,17400.0,A,Official figure
2562067,203,'724,Spain,515,'01341,Apples,5312,Area harvested,1963,1963,ha,18100.0,A,Official figure
2562068,203,'724,Spain,515,'01341,Apples,5312,Area harvested,1964,1964,ha,22200.0,A,Official figure
2562069,203,'724,Spain,515,'01341,Apples,5312,Area harvested,1965,1965,ha,26400.0,A,Official figure


### Pas 2: Anàlisi d'altres datasets per enriquir `agri_df`

Ara l'objectiu és tenir més informació per afegir a `agri_df` i construir el dataframe final `agri_product_df`. Per això, analitzarem els altres dataframes disponibles per veure quina informació poden aportar.

### Anàlisi del dataset `Prices`

Analitzem el dataset de Prices per veure quina informació conté i com podem integrar-la al dataframe final.


In [14]:
# Analitzar el dataset de Prices
analitzar_dataset_faostat('Prices')

Anàlisi del dataset: Prices

Dataframe principal (Prices_E_All_Data_(Normalized)):
Dimensions: 1,664,300 files x 15 columnes

 Columnes (15):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item Code (CPC)
   6. Item
   7. Element Code
   8. Element
   9. Year Code
  10. Year
  11. Months Code
  12. Months
  13. Unit
  14. Value
  15. Flag

 Primeres files del dataframe principal:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item Code (CPC),Item,Element Code,Element,Year Code,Year,Months Code,Months,Unit,Value,Flag
0,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1993,1993,7021,Annual value,LCU,46000.0,A
1,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1994,1994,7021,Annual value,LCU,50000.0,A
2,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1995,1995,7021,Annual value,LCU,62000.0,A
3,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1996,1996,7021,Annual value,LCU,50000.0,A
4,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1997,1997,7021,Annual value,LCU,41000.0,A



 Valors únics de la columna 'Item':
Total únics: 235 (mostrant com a màxim 50)


Unnamed: 0,Item
0,"Almonds, in shell"
1,"Anise, badian, coriander, cumin, caraway, fennel and juniper berries, raw"
2,Apples
3,Apricots
4,Barley
5,Cantaloupes and other melons
6,"Cotton lint, ginned"
7,Cotton seed
8,Figs
9,"Game meat, fresh, chilled or frozen"



 Valors únics de la columna 'Element':
Total únics: 4 (mostrant com a màxim 50)


Unnamed: 0,Element
0,Producer Price (LCU/tonne)
1,Producer Price (SLC/tonne)
2,Producer Price Index (2014-2016 = 100)
3,Producer Price (USD/tonne)


 Altres dataframes de Prices:

 Contingut de Prices_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,2,'004,Afghanistan
1,5100,'002,Africa
2,3,'008,Albania
3,4,'012,Algeria
4,5200,'019,Americas





 Contingut de Prices_E_Elements:


Unnamed: 0,Element Code,Element
0,5530,Producer Price (LCU/tonne)
1,5531,Producer Price (SLC/tonne)
2,5532,Producer Price (USD/tonne)
3,5539,Producer Price Index (2014-2016 = 100)





 Contingut de Prices_E_Flags:


Unnamed: 0,Flag,Description
0,A,Official figure
1,F,Forecast value
2,I,Value imputed by a receiving agency
3,X,Figure from external organization





 Contingut de Prices_E_ItemCodes:


Unnamed: 0,Item Code,CPC Code,Item
0,101,'01195,Canary seed
1,1013,'21115b,Meat of sheep; fresh or chilled (biological)
2,1017,'21116,Meat of goat; fresh or chilled
3,1020,'02292,Raw milk of goats
4,103,'01199.02,Mixed grain






### Creació de `prices_df`

Creem el dataframe de Prices basat en el dataset principal de Prices.

Del domini Prices hem seleccionat els tres tipus de preus de productor disponibles (LCU/tonne, SLC/tonne i USD/tonne) per permetre comparacions locals i internacionals. Aquests indicadors reflecteixen el valor econòmic dels productes agrícoles i permeten analitzar la rendibilitat i l'evolució dels mercats. Filtrem per 'Annual value' per tenir valors agregats anuals consistents amb la resta de datasets.

**Transformacions realitzades:**
- Eliminem les columnes `Note` i `Flag` (informació no utilitzable per a l'anàlisi)
- Renombrem columnes: `Element` a `price_unit`, `Element Code` a `price_unit_code`
- Filtrar per elements: Producer Price (LCU/tonne), Producer Price (SLC/tonne), Producer Price (USD/tonne)
- Filtrar per Months: 'Annual value'


In [15]:
# Crear prices_df utilitzant la funció generalitzada
key = 'Prices'
df_name = 'prices_df'
column_mapping = {
    'Element Code': 'price_unit_code',
    'Element': 'price_unit'
}
values_to_keep = {
    'price_unit': [
        'Producer Price (LCU/tonne)',
        'Producer Price (SLC/tonne)',
        'Producer Price (USD/tonne)'
    ],
    'Months': ['Annual value']
}

prices_df = crear_df_faostat_filtrat(
    key=key,
    df_name=df_name,
    column_mapping=column_mapping,
    values_to_keep=values_to_keep
)

Creació de prices_df

Valors diferents de 'Unit':
price_unit                  Months      
Producer Price (LCU/tonne)  Annual value    [LCU]
Producer Price (SLC/tonne)  Annual value    [SLC]
Producer Price (USD/tonne)  Annual value    [USD]
Name: Unit, dtype: object

Dimensions finals: 884,623 files x 14 columnes

Columnes finals (14):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item Code (CPC)
   6. Item
   7. price_unit_code
   8. price_unit
   9. Year Code
  10. Year
  11. Months Code
  12. Months
  13. Unit
  14. Value

Primeres files de prices_df:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item Code (CPC),Item,price_unit_code,price_unit,Year Code,Year,Months Code,Months,Unit,Value
0,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1993,1993,7021,Annual value,LCU,46000.0
1,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1994,1994,7021,Annual value,LCU,50000.0
2,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1995,1995,7021,Annual value,LCU,62000.0
3,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1996,1996,7021,Annual value,LCU,50000.0
4,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5530,Producer Price (LCU/tonne),1997,1997,7021,Annual value,LCU,41000.0



Resum de valors únics:
  - price_unit únics: 3 a ['Producer Price (LCU/tonne)', 'Producer Price (SLC/tonne)', 'Producer Price (USD/tonne)']
  - Months únics: 1 a ['Annual value']


### Pivot de `prices_df`

Realitzem el pivot per tenir una columna per cada combinació de producte agrícola i unitat de preu.


In [16]:
# Crear la columna pivot_col amb el format simplificat
prices_df["pivot_col"] = (
    "Value_"
    + prices_df["price_unit"].str.replace('Producer Price (', '').str.replace(')', '')
)

prices_pivot = pivot_faostat_df(
    df_to_pivot = prices_df,
    index_cols = ["Area Code", "Area Code (M49)", "Area", "Item Code", "Item Code (CPC)", "Item", "Year Code", "Year"]
)

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item Code (CPC),Item,Year Code,Year,Value_LCU/tonne,Value_SLC/tonne,Value_USD/tonne
248929,203,'724,Spain,15,'0111,Wheat,1991,1991,27653.0,166.2,266.1
248930,203,'724,Spain,15,'0111,Wheat,1992,1992,26755.0,160.8,261.3
248931,203,'724,Spain,15,'0111,Wheat,1993,1993,26772.0,160.9,210.4
248932,203,'724,Spain,15,'0111,Wheat,1994,1994,26838.0,161.3,200.3
248933,203,'724,Spain,15,'0111,Wheat,1995,1995,28252.0,169.8,226.6


### Merge de `prices_df` amb el dataframe principal

Un cop creat el pivot, farem el merge amb el dataframe principal utilitzant la funció `merge_agri_dataframes`.


In [17]:
# Fer el merge utilitzant la funció generalitzada
agri_prices_df = merge_agri_dataframes(
    df_initial=agri_df,
    df_to_merge=prices_pivot,
    df_name_initial="agri_df",
    df_name_to_merge="prices_pivot",
    merge_cols=["Area Code", "Area Code (M49)", "Area", "Item Code", "Item Code (CPC)", "Item", "Year Code", "Year"]
)


Nombre de files de agri_df: 4116252
Nombre de columnes de agri_df: 14
Nombre de files de prices_pivot: 305036
Nombre de columnes de prices_pivot: 11
Nombre final de files del resultat: 4116252
Nombre final de columnes del resultat: 17


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item Code (CPC),Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Flag_Description,Value_LCU/tonne,Value_SLC/tonne,Value_USD/tonne
2561788,203,'724,Spain,221,'01371,"Almonds, in shell",5312,Area harvested,2020,2020,ha,718540.0,A,Official figure,1054.5,1054.5,1204.4
2561851,203,'724,Spain,221,'01371,"Almonds, in shell",5412,Yield,2020,2020,kg/ha,580.3,A,Official figure,1054.5,1054.5,1204.4
2561914,203,'724,Spain,221,'01371,"Almonds, in shell",5510,Production,2020,2020,t,416950.0,A,Official figure,1054.5,1054.5,1204.4
2562124,203,'724,Spain,515,'01341,Apples,5312,Area harvested,2020,2020,ha,29490.0,A,Official figure,499.0,499.0,570.0
2562187,203,'724,Spain,515,'01341,Apples,5412,Yield,2020,2020,kg/ha,17704.3,A,Official figure,499.0,499.0,570.0


### Construcció de `agri_country_year_df`

Comencem a analitzar el datasets que no dependran del producte, si no únicament del país i l'any

In [18]:
# Començarem per analitzar el dataset de FertilizersNutrient per veure quina informació conté i com podem integrar-la.

analitzar_dataset_faostat('Inputs_FertilizersNutrient')

Anàlisi del dataset: Inputs_FertilizersNutrient

Dataframe principal (Inputs_FertilizersNutrient_E_All_Data_(Normalized)):
Dimensions: 241,859 files x 13 columnes

 Columnes (13):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item
   6. Element Code
   7. Element
   8. Year Code
   9. Year
  10. Unit
  11. Value
  12. Flag
  13. Note

 Primeres files del dataframe principal:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note
0,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5510,Production,1974,1974,t,19117.0,A,Official data from questionnaire
1,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5510,Production,1975,1975,t,14774.0,A,Official data from questionnaire
2,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5510,Production,1976,1976,t,26203.0,A,Official data from questionnaire
3,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5510,Production,1977,1977,t,37689.0,A,Official data from questionnaire
4,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5510,Production,1978,1978,t,48300.0,A,Official data from questionnaire



 Valors únics de la columna 'Item':
Total únics: 3 (mostrant com a màxim 50)


Unnamed: 0,Item
0,Nutrient nitrogen N (total)
1,Nutrient phosphate P2O5 (total)
2,Nutrient potash K2O (total)



 Valors únics de la columna 'Element':
Total únics: 7 (mostrant com a màxim 50)


Unnamed: 0,Element
0,Production
1,Import quantity
2,Export quantity
3,Agricultural Use
4,Use per area of cropland
5,Use per capita
6,Use per value of agricultural production


 Altres dataframes de Inputs_FertilizersNutrient:

 Contingut de Inputs_FertilizersNutrient_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,2,'004,Afghanistan
1,5100,'002,Africa
2,51000,'002.03,Africa (excluding intra-trade)
3,3,'008,Albania
4,4,'012,Algeria





 Contingut de Inputs_FertilizersNutrient_E_Elements:


Unnamed: 0,Element Code,Element
0,5157,Agricultural Use
1,5910,Export quantity
2,5610,Import quantity
3,5510,Production
4,5159,Use per area of cropland





 Contingut de Inputs_FertilizersNutrient_E_Flags:


Unnamed: 0,Flag,Description
0,A,Official figure
1,B,Time series break
2,E,Estimated value
3,I,Value imputed by a receiving agency
4,X,Figure from external organization





 Contingut de Inputs_FertilizersNutrient_E_ItemCodes:


Unnamed: 0,Item Code,CPC Code,Item
0,3102,'F3102,Nutrient nitrogen N (total)






### Processament de `fertilizers_df`

Creem el dataframe de fertilitzants basat en el dataset principal de Inputs_FertilizersNutrient. 

**Transformacions realitzades:**
- Eliminem les columnes `Note` i `Flag` (informació no utilitzable per a l'anàlisi)
- Renombrem columnes per a millor claredat
- Fem mapping dels nutrients per simplificar els noms
- Filtrar els elements per quedar-nos només amb els indicadors rellevants per a l'anàlisi agrícola
- L'objectiu és tenir un registre per cada Area x Year


In [19]:
# Crear fertilizers_df utilitzant la funció generalitzada
key = 'Inputs_FertilizersNutrient'
df_name = 'fertilizers_df'
column_mapping = {
    'Item Code': 'nutrient_code',
    'Item': 'nutrient',
    'Element Code': 'fertilizer_metric_code',
    'Element': 'fertilizer_metric'
}
values_to_keep = {
    'fertilizer_metric': [
        'Agricultural Use',
        'Use per area of cropland',
        'Use per capita',
        'Use per value of agricultural production'
    ]
}

fertilizers_df = crear_df_faostat_filtrat(
    key=key,
    df_name=df_name,
    column_mapping=column_mapping,
    values_to_keep=values_to_keep
)

# Mapping dels nutrients
nutrient_mapping = {
    'Nutrient nitrogen N (total)': 'N',
    'Nutrient phosphate P2O5 (total)': 'P2O5',
    'Nutrient potash K2O (total)': 'K2O'
}
fertilizers_df['nutrient'] = fertilizers_df['nutrient'].map(nutrient_mapping).fillna(fertilizers_df['nutrient'])
print(f"\nMapping de nutrients aplicat")
display(fertilizers_df.head())


Creació de fertilizers_df

Valors diferents de 'Unit':
fertilizer_metric
Agricultural Use                                 [t]
Use per area of cropland                     [kg/ha]
Use per capita                              [kg/cap]
Use per value of agricultural production    [g/Int$]
Name: Unit, dtype: object

Dimensions finals: 147,437 files x 11 columnes

Columnes finals (11):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. nutrient_code
   5. nutrient
   6. fertilizer_metric_code
   7. fertilizer_metric
   8. Year Code
   9. Year
  10. Unit
  11. Value

Primeres files de fertilizers_df:


Unnamed: 0,Area Code,Area Code (M49),Area,nutrient_code,nutrient,fertilizer_metric_code,fertilizer_metric,Year Code,Year,Unit,Value
161,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5157,Agricultural Use,1961,1961,t,1000.0
162,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5157,Agricultural Use,1962,1962,t,1000.0
163,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5157,Agricultural Use,1963,1963,t,1000.0
164,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5157,Agricultural Use,1964,1964,t,1000.0
165,2,'004,Afghanistan,3102,Nutrient nitrogen N (total),5157,Agricultural Use,1965,1965,t,1000.0



Resum de valors únics:
  - fertilizer_metric únics: 4 a ['Agricultural Use', 'Use per area of cropland', 'Use per capita', 'Use per value of agricultural production']

Mapping de nutrients aplicat


Unnamed: 0,Area Code,Area Code (M49),Area,nutrient_code,nutrient,fertilizer_metric_code,fertilizer_metric,Year Code,Year,Unit,Value
161,2,'004,Afghanistan,3102,N,5157,Agricultural Use,1961,1961,t,1000.0
162,2,'004,Afghanistan,3102,N,5157,Agricultural Use,1962,1962,t,1000.0
163,2,'004,Afghanistan,3102,N,5157,Agricultural Use,1963,1963,t,1000.0
164,2,'004,Afghanistan,3102,N,5157,Agricultural Use,1964,1964,t,1000.0
165,2,'004,Afghanistan,3102,N,5157,Agricultural Use,1965,1965,t,1000.0


Amb l'objectiu de tenir un registre per cada Area x Year, fem un pivot de fertilizers_df per la metrica.

Per fer-ho, em de vist que les unitats són úniques per cada fertilizer metric.

In [20]:
# Ja que són úniques, podem fer el pivot.
# Creem una columna con el nom final de columna a formar després del pivot
fertilizers_df["pivot_col"] = (
    "Value_nutrient_"
    + fertilizers_df["nutrient_code"].astype(str)
    + "_"
    + fertilizers_df["nutrient"].astype(str)
    + "_"
    + fertilizers_df["fertilizer_metric"].str.replace(" ", "_")
    + "_"
    + fertilizers_df["Unit"].astype(str)
)

# Realitzem el pivot
fertilizers_pivot = pivot_faostat_df(
    df_to_pivot = fertilizers_df,
)

Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$
8638,203,'724,Spain,1961,1961,327178.0,15.78,10.66,17.77,307978.0,14.86,10.03,16.72,94700.0,4.57,3.08,5.14
8639,203,'724,Spain,1962,1962,345497.0,16.58,11.13,18.98,311111.0,14.93,10.02,17.09,95578.0,4.59,3.08,5.25
8640,203,'724,Spain,1963,1963,333031.0,15.98,10.61,15.59,313685.0,15.05,10.0,14.69,99654.0,4.78,3.18,4.67
8641,203,'724,Spain,1964,1964,364054.0,17.71,11.47,18.58,311973.0,15.18,9.83,15.92,91132.0,4.43,2.87,4.65
8642,203,'724,Spain,1965,1965,384720.0,18.68,11.98,19.53,305522.0,14.84,9.51,15.51,92117.0,4.47,2.87,4.68


In [21]:
# Associem el fertilizers_pivot a un df per anar actualitzant que serà el final.
agri_fertilizers_df = fertilizers_pivot.copy()

print(f"Nombre final de files d'agri_fertilizers_df: {agri_fertilizers_df.shape[0]}")
print(f"Nombre final de columnes d'agri_fertilizers_df: {agri_fertilizers_df.shape[1]}")

display(agri_fertilizers_df[agri_fertilizers_df["Area"] == "Spain"].head())

Nombre final de files d'agri_fertilizers_df: 12729
Nombre final de columnes d'agri_fertilizers_df: 17


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$
8638,203,'724,Spain,1961,1961,327178.0,15.78,10.66,17.77,307978.0,14.86,10.03,16.72,94700.0,4.57,3.08,5.14
8639,203,'724,Spain,1962,1962,345497.0,16.58,11.13,18.98,311111.0,14.93,10.02,17.09,95578.0,4.59,3.08,5.25
8640,203,'724,Spain,1963,1963,333031.0,15.98,10.61,15.59,313685.0,15.05,10.0,14.69,99654.0,4.78,3.18,4.67
8641,203,'724,Spain,1964,1964,364054.0,17.71,11.47,18.58,311973.0,15.18,9.83,15.92,91132.0,4.43,2.87,4.65
8642,203,'724,Spain,1965,1965,384720.0,18.68,11.98,19.53,305522.0,14.84,9.51,15.51,92117.0,4.47,2.87,4.68


### Processament del dataset LandUse

Analitzem el dataset de LandUse per veure quina informació conté i com podem integrar-la al dataframe final.


In [22]:
# Analitzar el dataset de LandUse
analitzar_dataset_faostat('Inputs_LandUse')


Anàlisi del dataset: Inputs_LandUse

Dataframe principal (Inputs_LandUse_E_All_Data_(Normalized)):
Dimensions: 413,211 files x 13 columnes

 Columnes (13):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item
   6. Element Code
   7. Element
   8. Year Code
   9. Year
  10. Unit
  11. Value
  12. Flag
  13. Note

 Primeres files del dataframe principal:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note
0,2,'004,Afghanistan,6600,Country area,5110,Area,1961,1961,1000 ha,65286.0,A,
1,2,'004,Afghanistan,6600,Country area,5110,Area,1962,1962,1000 ha,65286.0,A,
2,2,'004,Afghanistan,6600,Country area,5110,Area,1963,1963,1000 ha,65286.0,A,
3,2,'004,Afghanistan,6600,Country area,5110,Area,1964,1964,1000 ha,65286.0,A,
4,2,'004,Afghanistan,6600,Country area,5110,Area,1965,1965,1000 ha,65286.0,A,



 Valors únics de la columna 'Item':
Total únics: 45 (mostrant com a màxim 50)


Unnamed: 0,Item
0,Country area
1,Land area
2,Agriculture
3,Agricultural land
4,Cropland
5,Arable land
6,Temporary crops
7,Temporary meadows and pastures
8,Temporary fallow
9,Permanent crops



 Valors únics de la columna 'Element':
Total únics: 8 (mostrant com a màxim 50)


Unnamed: 0,Element
0,Area
1,Share in Land area
2,Value of agricultural production (Int. $) per Area
3,Share in Agricultural land
4,Area per capita
5,Carbon stock in living biomass
6,Share in Forest land
7,Share in Cropland


 Altres dataframes de Inputs_LandUse:

 Contingut de Inputs_LandUse_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,2,'004,Afghanistan
1,5100,'002,Africa
2,3,'008,Albania
3,4,'012,Algeria
4,5,'016,American Samoa





 Contingut de Inputs_LandUse_E_Elements:


Unnamed: 0,Element Code,Element
0,5110,Area
1,7277,Area per capita
2,72151,Carbon stock in living biomass
3,72000,Indicators
4,7208,Share in Agricultural land





 Contingut de Inputs_LandUse_E_Flags:


Unnamed: 0,Flag,Description
0,A,Official figure
1,B,Time series break
2,E,Estimated value
3,I,Value imputed by a receiving agency
4,X,Figure from external organization





 Contingut de Inputs_LandUse_E_ItemCodes:


Unnamed: 0,Item Code,CPC Code,Item
0,6610,,Agricultural land
1,66710,,Agricultural practices
2,6602,,Agriculture
3,66020,,Agriculture
4,6611,,Agriculture area actually irrigated






### Creació de `landuse_df`

Creem el dataframe de LandUse basat en el dataset principal de Inputs_LandUse.

Hem seleccionat únicament els ítems que descriuen l'estructura fonamental del sòl agrícola d'un país (terra agrícola, terres cultivades, terres arables, cultius permanents i pastures). Aquests indicadors són estables, no redundants i permeten analitzar l'evolució del sector agrari i la seva relació amb el clima, la producció i l'ús d'insums. La resta d'items aporten un nivell de detall massa específic o redundant que incrementaria innecessàriament la dimensionalitat del conjunt de dades i dificultaria la visualització final. Per tal d'evitar duplicitats, també mantenim només l'element "Area", ja que els "Share" i "per capita" poden derivar-se fàcilment durant l'anàlisi.

Per tant:
- Eliminem les columnes `Note` i `Flag` (informació no utilitzable per a l'anàlisi)
- Renombrem columnes per a millor claredat: `Item` a `land_category`, `Element` a `land_metric`
- Filtrar els items i elements per quedar-nos només amb els indicadors rellevants


In [23]:
# Crear landuse_df utilitzant la funció generalitzada
key = 'Inputs_LandUse'
df_name = 'landuse_df'
column_mapping = {
    'Item Code': 'land_category_code',
    'Item': 'land_category',
    'Element Code': 'land_metric_code',
    'Element': 'land_metric'
}
values_to_keep = {
    'land_category': [
        'Land area',
        'Agricultural land',
        'Cropland',
        'Arable land',
        'Permanent crops',
        'Permanent meadows and pastures'
    ],
    'land_metric': ['Area']
}

landuse_df = crear_df_faostat_filtrat(
    key=key,
    df_name=df_name,
    column_mapping=column_mapping,
    values_to_keep=values_to_keep
)

Creació de landuse_df

Valors diferents de 'Unit':
land_category                   land_metric
Agricultural land               Area           [1000 ha]
Arable land                     Area           [1000 ha]
Cropland                        Area           [1000 ha]
Land area                       Area           [1000 ha]
Permanent crops                 Area           [1000 ha]
Permanent meadows and pastures  Area           [1000 ha]
Name: Unit, dtype: object

Dimensions finals: 92,300 files x 11 columnes

Columnes finals (11):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. land_category_code
   5. land_category
   6. land_metric_code
   7. land_metric
   8. Year Code
   9. Year
  10. Unit
  11. Value

Primeres files de landuse_df:


Unnamed: 0,Area Code,Area Code (M49),Area,land_category_code,land_category,land_metric_code,land_metric,Year Code,Year,Unit,Value
63,2,'004,Afghanistan,6601,Land area,5110,Area,1961,1961,1000 ha,65223.0
64,2,'004,Afghanistan,6601,Land area,5110,Area,1962,1962,1000 ha,65223.0
65,2,'004,Afghanistan,6601,Land area,5110,Area,1963,1963,1000 ha,65223.0
66,2,'004,Afghanistan,6601,Land area,5110,Area,1964,1964,1000 ha,65223.0
67,2,'004,Afghanistan,6601,Land area,5110,Area,1965,1965,1000 ha,65223.0



Resum de valors únics:
  - land_category únics: 6 a ['Agricultural land', 'Arable land', 'Cropland', 'Land area', 'Permanent crops', 'Permanent meadows and pastures']
  - land_metric únics: 1 a ['Area']


In [24]:
# Convertim Value de 1000 ha a ha
landuse_df['Value'] = landuse_df['Value'] * 1000

# Crear la columna pivot_col segons l'estructura
landuse_df['pivot_col'] = (
    'Value_'
    + landuse_df['land_category_code'].astype(str)
    + '_'
    + landuse_df['land_category'].str.lower().str.replace(' ', '_')
    + '_ha'
)

# Pivotem el dataframe utilitzant la funció generalitzada
landuse_pivot = pivot_faostat_df(landuse_df)

Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha
10982,203,'724,Spain,1961,1961,49978000.0,33230000.0,20730000.0,16246000.0,4484000.0,12500000.0
10983,203,'724,Spain,1962,1962,49978000.0,33232000.0,20832000.0,16296000.0,4536000.0,12400000.0
10984,203,'724,Spain,1963,1963,49978000.0,33137000.0,20837000.0,16205000.0,4632000.0,12300000.0
10985,203,'724,Spain,1964,1964,49978000.0,32753000.0,20553000.0,15919000.0,4634000.0,12200000.0
10986,203,'724,Spain,1965,1965,49978000.0,32694000.0,20594000.0,15966000.0,4628000.0,12100000.0


In [25]:
# Fer el merge utilitzant la funció generalitzada
agri_landuse_df = merge_agri_dataframes(
    df_initial=agri_fertilizers_df,
    df_to_merge=landuse_pivot,
    df_name_initial="agri_fertilizers_df",
    df_name_to_merge="landuse_pivot"
)

Nombre de files de agri_fertilizers_df: 12729
Nombre de columnes de agri_fertilizers_df: 17
Nombre de files de landuse_pivot: 16228
Nombre de columnes de landuse_pivot: 11
Nombre final de files del resultat: 12729
Nombre final de columnes del resultat: 23


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha
8697,203,'724,Spain,2020,2020,1059299.0,63.64,22.22,18.35,486673.0,29.24,10.21,8.43,399489.0,24.0,8.38,6.92,49955664.5,26142638.1,16646395.1,11639398.1,5006997.0,9496243.0


### Creació de `pesticides_df`

Creem el dataframe de pesticides basat en el dataset principal de Inputs_Pesticides_Use.

Per al domini de pesticides hem seleccionat únicament l'ítem "Pesticides (total)", ja que és l'indicador agregat que resumeix la pressió química exercida sobre els sistemes agrícoles d'un país. La resta d'ítems (35 categories d'insecticides, herbicides o fungicides) aporten un nivell de detall excessiu, sovint inconsistent entre països i anys, i no són útils per a una anàlisi macro a escala global. Pel que fa als elements, mantenim "Agricultural Use" com a indicador principal perquè reflecteix l'ús real de pesticides en agricultura. Aquesta selecció minimitza la complexitat del conjunt de dades i facilita la interpretació i visualització dels resultats.

**Transformacions realitzades:**
- Eliminem les columnes `Note` i `Flag` (informació no utilitzable per a l'anàlisi)
- Renombrem columnes per a millor claredat: `Item` a `pesticide_category`, `Element` a `pesticide_metric`
- Filtrar els items i elements: només "Pesticides (total)" i "Agricultural Use"


### Anàlisi del dataset Pesticides Use

Analitzem el dataset de Pesticides Use per veure quina informació conté i com podem integrar-la al dataframe final.


In [26]:
# Analitzar el dataset de Pesticides Use
analitzar_dataset_faostat('Inputs_Pesticides_Use')


Anàlisi del dataset: Inputs_Pesticides_Use

Dataframe principal (Inputs_Pesticides_Use_E_All_Data_(Normalized)):
Dimensions: 103,622 files x 13 columnes

 Columnes (13):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item
   6. Element Code
   7. Element
   8. Year Code
   9. Year
  10. Unit
  11. Value
  12. Flag
  13. Note

 Primeres files del dataframe principal:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note
0,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1990,1990,t,121.0,I,Imputed value
1,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1991,1991,t,121.0,I,Imputed value
2,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1992,1992,t,121.0,I,Imputed value
3,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1993,1993,t,121.0,A,Official figure
4,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1994,1994,t,201.0,A,Official figure



 Valors únics de la columna 'Item':
Total únics: 36 (mostrant com a màxim 50)


Unnamed: 0,Item
0,Pesticides (total)
1,Insecticides
2,Insecticides – Chlorinated Hydrocarbons
3,Insecticides – Organo-phosphates
4,Insecticides – Carbamates
5,Insecticides – Pyrethroids
6,Insecticides - nes
7,Herbicides
8,Herbicides – Phenoxy hormone products
9,Herbicides – Triazines



 Valors únics de la columna 'Element':
Total únics: 4 (mostrant com a màxim 50)


Unnamed: 0,Element
0,Agricultural Use
1,Use per area of cropland
2,Use per capita
3,Use per value of agricultural production


 Altres dataframes de Inputs_Pesticides_Use:

 Contingut de Inputs_Pesticides_Use_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,5100,'002,Africa
1,3,'008,Albania
2,4,'012,Algeria
3,5200,'019,Americas
4,6,'020,Andorra





 Contingut de Inputs_Pesticides_Use_E_Elements:


Unnamed: 0,Element Code,Element
0,5157,Agricultural Use
1,5159,Use per area of cropland
2,5172,Use per capita
3,5173,Use per value of agricultural production





 Contingut de Inputs_Pesticides_Use_E_Flags:


Unnamed: 0,Flag,Description
0,A,Official figure
1,E,Estimated value
2,I,Value imputed by a receiving agency
3,X,Figure from external organization





 Contingut de Inputs_Pesticides_Use_E_ItemCodes:


Unnamed: 0,Item Code,CPC Code,Item






### Creació de `pesticides_df`

Creem el dataframe de pesticides basat en el dataset principal de Inputs_Pesticides_Use.

Per al domini de pesticides hem seleccionat únicament l'ítem "Pesticides (total)", ja que és l'indicador agregat que resumeix la pressió química exercida sobre els sistemes agrícoles d'un país. La resta d'ítems (35 categories d'insecticides, herbicides o fungicides) aporten un nivell de detall excessiu, sovint inconsistent entre països i anys, i no són útils per a una anàlisi macro a escala global. Pel que fa als elements, mantenim "Agricultural Use" com a indicador principal perquè reflecteix l'ús real de pesticides en agricultura. Aquesta selecció minimitza la complexitat del conjunt de dades i facilita la interpretació i visualització dels resultats.

**Transformacions realitzades:**
- Eliminem les columnes `Note` i `Flag` (informació no utilitzable per a l'anàlisi)
- Renombrem columnes per a millor claredat: `Item` a `pesticide_category`, `Element` a `pesticide_metric`
- Filtrar els items i elements: només "Pesticides (total)" i "Agricultural Use"


In [27]:
# Creem el dataframe de pesticides
key = 'Inputs_Pesticides_Use'
df_name = 'pesticides_df'
column_mapping = {
    'Item Code': 'pesticide_category_code',
    'Item': 'pesticide_category',
    'Element Code': 'pesticide_metric_code',
    'Element': 'pesticide_metric'
}
values_to_keep = {
    'pesticide_category': ['Pesticides (total)'],
    'pesticide_metric': ['Agricultural Use']
}

pesticides_df = crear_df_faostat_filtrat(
    key=key,
    df_name=df_name,
    column_mapping=column_mapping,
    values_to_keep=values_to_keep
)

Creació de pesticides_df

Valors diferents de 'Unit':
pesticide_category  pesticide_metric
Pesticides (total)  Agricultural Use    [t]
Name: Unit, dtype: object

Dimensions finals: 8,226 files x 11 columnes

Columnes finals (11):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. pesticide_category_code
   5. pesticide_category
   6. pesticide_metric_code
   7. pesticide_metric
   8. Year Code
   9. Year
  10. Unit
  11. Value

Primeres files de pesticides_df:


Unnamed: 0,Area Code,Area Code (M49),Area,pesticide_category_code,pesticide_category,pesticide_metric_code,pesticide_metric,Year Code,Year,Unit,Value
0,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1990,1990,t,121.0
1,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1991,1991,t,121.0
2,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1992,1992,t,121.0
3,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1993,1993,t,121.0
4,3,'008,Albania,1357,Pesticides (total),5157,Agricultural Use,1994,1994,t,201.0



Resum de valors únics:
  - pesticide_category únics: 1 a ['Pesticides (total)']
  - pesticide_metric únics: 1 a ['Agricultural Use']


In [28]:
# Ens quedem només amb les columnes necessàries i renomem el valor per claredat.
# No fem pivotatge perquè els valors d''Item' i 'Element' ja són únics, això és,
# només hem seleccionat l'ítem "Pesticides (total)" i l'element "Agricultural Use",
# per tant cada fila ja representa un únic valor anual per país.

pesticides_pivot_df = pesticides_df[['Area Code', 'Area Code (M49)', 'Area', 'Year Code', 'Year', 'Value']].copy()
pesticides_pivot_df = pesticides_pivot_df.rename(columns={
    'Value': 'Value_pesticide_agricultural_use_t'
})

# Mostrem les primeres files per comprovació
pesticides_pivot_df.head()


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_pesticide_agricultural_use_t
0,3,'008,Albania,1990,1990,121.0
1,3,'008,Albania,1991,1991,121.0
2,3,'008,Albania,1992,1992,121.0
3,3,'008,Albania,1993,1993,121.0
4,3,'008,Albania,1994,1994,201.0


In [29]:
# Ara podem fer el merge

# Utilitzem la funció definida prèviament per fer merges successius amb altres dataframes,

agri_pesticides_df = merge_agri_dataframes(
    df_initial=agri_landuse_df,
    df_to_merge=pesticides_pivot_df,
    df_name_initial="agri_landuse_df",
    df_name_to_merge="pesticides_pivot_df"
)

Nombre de files de agri_landuse_df: 12729
Nombre de columnes de agri_landuse_df: 23
Nombre de files de pesticides_pivot_df: 8226
Nombre de columnes de pesticides_pivot_df: 6
Nombre final de files del resultat: 12729
Nombre final de columnes del resultat: 24


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha,Value_pesticide_agricultural_use_t
8697,203,'724,Spain,2020,2020,1059299.0,63.64,22.22,18.35,486673.0,29.24,10.21,8.43,399489.0,24.0,8.38,6.92,49955664.5,26142638.1,16646395.1,11639398.1,5006997.0,9496243.0,75775.0


### Anàlisi del dataset LandCover

Analitzem el dataset de LandCover per veure quina informació conté i com podem integrar-la al dataframe final.


In [30]:
# Analitzar el dataset de LandCover
analitzar_dataset_faostat('Environment_LandCover')


Anàlisi del dataset: Environment_LandCover

Dataframe principal (Environment_LandCover_E_All_Data_(Normalized)):
Dimensions: 210,986 files x 12 columnes

 Columnes (12):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item
   6. Element Code
   7. Element
   8. Year Code
   9. Year
  10. Unit
  11. Value
  12. Flag

 Primeres files del dataframe principal:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,'004,Afghanistan,6970,Artificial surfaces (including urban and associated areas),5006,Area from CGLS,2015,2015,1000 ha,102.77,E
1,2,'004,Afghanistan,6970,Artificial surfaces (including urban and associated areas),5006,Area from CGLS,2016,2016,1000 ha,103.3,E
2,2,'004,Afghanistan,6970,Artificial surfaces (including urban and associated areas),5006,Area from CGLS,2017,2017,1000 ha,105.41,E
3,2,'004,Afghanistan,6970,Artificial surfaces (including urban and associated areas),5006,Area from CGLS,2018,2018,1000 ha,107.46,E
4,2,'004,Afghanistan,6970,Artificial surfaces (including urban and associated areas),5006,Area from CGLS,2019,2019,1000 ha,108.34,E



 Valors únics de la columna 'Item':
Total únics: 14 (mostrant com a màxim 50)


Unnamed: 0,Item
0,Artificial surfaces (including urban and associated areas)
1,Herbaceous crops
2,Woody crops
3,Multiple or layered crops
4,Grassland
5,Tree-covered areas
6,Mangroves
7,Shrub-covered areas
8,"Shrubs and/or herbaceous vegetation, aquatic or regularly flooded"
9,Sparsely natural vegetated areas



 Valors únics de la columna 'Element':
Total únics: 4 (mostrant com a màxim 50)


Unnamed: 0,Element
0,Area from CGLS
1,Area from MODIS
2,Area from CCI_LC
3,Area from WorldCover


 Altres dataframes de Environment_LandCover:

 Contingut de Environment_LandCover_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,2,'004,Afghanistan
1,5100,'002,Africa
2,3,'008,Albania
3,4,'012,Algeria
4,5,'016,American Samoa





 Contingut de Environment_LandCover_E_Elements:


Unnamed: 0,Element Code,Element
0,5008,Area from CCI_LC
1,5006,Area from CGLS
2,5007,Area from MODIS
3,5013,Area from WorldCover





 Contingut de Environment_LandCover_E_Flags:


Unnamed: 0,Flag,Description
0,E,Estimated value
1,O,Missing value





 Contingut de Environment_LandCover_E_ItemCodes:


Unnamed: 0,Item Code,CPC Code,Item






### Creació de `landcover_df`

Creem el dataframe de LandCover basat en el dataset principal de Environment_LandCover.

Del domini Land Cover hem seleccionat únicament els ítems “Grassland”, “Tree-covered areas” i “Herbaceous crops”, ja que permeten caracteritzar els principals usos del sòl relacionats amb agricultura, ramaderia i ocupació forestal. La resta d’ítems aporten un nivell de detall massa específic, poc estable al llarg del temps o no relacionat directament amb l’activitat agrària. Igualment, mantenim només l’element “Area from CCI_LC”, ja que és el producte de coberta terrestre amb major consistència metodològica i aplicabilitat global (https://www.esa-landcover-cci.org/), i evita la duplicació d’informació que generaria utilitzar MODIS, CGLS o WorldCover. Aquesta selecció garanteix un conjunt de dades lleuger, coherent i útil per a la visualització final.

**Transformacions realitzades:**
- Eliminem les columnes `Note` i `Flag` (informació no utilitzable per a l'anàlisi)
- Renombrem columnes per a millor claredat: `Item` a `landcover_category`, `Element` a `landcover_metric`
# Filtrar els items i elements per a LandCover:
landcover_items_to_keep = [
    "Grassland",
    "Tree-covered areas",
    "Herbaceous crops"
]
landcover_elements_to_keep = ["Area from CCI_LC"]


In [31]:
# Crear landcover_df utilitzant la funció generalitzada
key = 'Environment_LandCover'
df_name = 'landcover_df'
column_mapping = {
    'Item Code': 'landcover_category_code',
    'Item': 'landcover_category',
    'Element Code': 'landcover_metric_code',
    'Element': 'landcover_metric'
}
values_to_keep = {
    'landcover_category': [
        "Grassland",
        "Tree-covered areas",
        "Herbaceous crops"
    ],
    'landcover_metric': ["Area from CCI_LC"]
}

landcover_df = crear_df_faostat_filtrat(
    key=key,
    df_name=df_name,
    column_mapping=column_mapping,
    values_to_keep=values_to_keep
)

Creació de landcover_df

Valors diferents de 'Unit':
landcover_category  landcover_metric
Grassland           Area from CCI_LC    [1000 ha]
Herbaceous crops    Area from CCI_LC    [1000 ha]
Tree-covered areas  Area from CCI_LC    [1000 ha]
Name: Unit, dtype: object

Dimensions finals: 25,164 files x 11 columnes

Columnes finals (11):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. landcover_category_code
   5. landcover_category
   6. landcover_metric_code
   7. landcover_metric
   8. Year Code
   9. Year
  10. Unit
  11. Value

Primeres files de landcover_df:


Unnamed: 0,Area Code,Area Code (M49),Area,landcover_category_code,landcover_category,landcover_metric_code,landcover_metric,Year Code,Year,Unit,Value
89,2,'004,Afghanistan,6971,Herbaceous crops,5008,Area from CCI_LC,1992,1992,1000 ha,5759.76
90,2,'004,Afghanistan,6971,Herbaceous crops,5008,Area from CCI_LC,1993,1993,1000 ha,5765.63
91,2,'004,Afghanistan,6971,Herbaceous crops,5008,Area from CCI_LC,1994,1994,1000 ha,5763.95
92,2,'004,Afghanistan,6971,Herbaceous crops,5008,Area from CCI_LC,1995,1995,1000 ha,5768.02
93,2,'004,Afghanistan,6971,Herbaceous crops,5008,Area from CCI_LC,1996,1996,1000 ha,5781.61



Resum de valors únics:
  - landcover_category únics: 3 a ['Grassland', 'Herbaceous crops', 'Tree-covered areas']
  - landcover_metric únics: 1 a ['Area from CCI_LC']


### Pivot de `landcover_df`

In [32]:
# Creem la columna pivot_col segons l'estructura:
landcover_df["pivot_col"] = (
    "Value_"
    + landcover_df["landcover_category_code"].astype(str)
    + "_"
    + landcover_df["landcover_category"].str.lower().str.replace(' ', '_')
    + "_ha"
)

# Convertim Value de 1000 ha a ha
landcover_df["Value"] = landcover_df["Value"] * 1000

# Pivotem el dataframe
landcover_pivot = pivot_faostat_df(landcover_df)

Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_6971_herbaceous_crops_ha,Value_6974_tree-covered_areas_ha,Value_6983_grassland_ha
5756,203,'724,Spain,1992,1992,16689280.0,13019730.0,7184060.0
5757,203,'724,Spain,1993,1993,16688080.0,13020160.0,7183810.0
5758,203,'724,Spain,1994,1994,16624710.0,13020610.0,7183600.0
5759,203,'724,Spain,1995,1995,16455090.0,12949110.0,7192770.0
5760,203,'724,Spain,1996,1996,16614490.0,12937310.0,7193760.0


### Merge de `landcover_df` amb el dataframe principal

Un cop creat el pivot, farem el merge amb el dataframe principal utilitzant la funció `merge_agri_dataframes`.


In [33]:
# Fem el merge utilitzant la funció generalitzada
agri_landcover_df = merge_agri_dataframes(
    df_initial=agri_pesticides_df,
    df_to_merge=landcover_pivot,
    df_name_initial="agri_pesticides_df",
    df_name_to_merge="landcover_pivot"
)

Nombre de files de agri_pesticides_df: 12729
Nombre de columnes de agri_pesticides_df: 24
Nombre de files de landcover_pivot: 8388
Nombre de columnes de landcover_pivot: 8
Nombre final de files del resultat: 12729
Nombre final de columnes del resultat: 27


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha,Value_pesticide_agricultural_use_t,Value_6971_herbaceous_crops_ha,Value_6974_tree-covered_areas_ha,Value_6983_grassland_ha
8697,203,'724,Spain,2020,2020,1059299.0,63.64,22.22,18.35,486673.0,29.24,10.21,8.43,399489.0,24.0,8.38,6.92,49955664.5,26142638.1,16646395.1,11639398.1,5006997.0,9496243.0,75775.0,15384230.0,12553120.0,7383290.0


### Anàlisi del dataset Emissions

Analitzem el dataset de Emissions per veure quina informació conté i com podem integrar-la al dataframe final.


In [34]:
# Analitzar el dataset de Emissions
analitzar_dataset_faostat('Emissions_Totals')


Anàlisi del dataset: Emissions_Totals

Dataframe principal (Emissions_Totals_E_All_Data_(Normalized)):
Dimensions: 2,500,184 files x 15 columnes

 Columnes (15):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item
   6. Element Code
   7. Element
   8. Year Code
   9. Year
  10. Source Code
  11. Source
  12. Unit
  13. Value
  14. Flag
  15. Note

 Primeres files del dataframe principal:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Source Code,Source,Unit,Value,Flag,Note
0,2,'004,Afghanistan,5064,Crop Residues,7234,Direct emissions (N2O),1961,1961,3050,FAO TIER 1,kt,0.8762,E,
1,2,'004,Afghanistan,5064,Crop Residues,7234,Direct emissions (N2O),1962,1962,3050,FAO TIER 1,kt,0.8829,E,
2,2,'004,Afghanistan,5064,Crop Residues,7234,Direct emissions (N2O),1963,1963,3050,FAO TIER 1,kt,0.8236,E,
3,2,'004,Afghanistan,5064,Crop Residues,7234,Direct emissions (N2O),1964,1964,3050,FAO TIER 1,kt,0.8882,E,
4,2,'004,Afghanistan,5064,Crop Residues,7234,Direct emissions (N2O),1965,1965,3050,FAO TIER 1,kt,0.8978,E,



 Valors únics de la columna 'Item':
Total únics: 44 (mostrant com a màxim 50)


Unnamed: 0,Item
0,Crop Residues
1,Rice Cultivation
2,Burning - Crop residues
3,Enteric Fermentation
4,Manure Management
5,Manure left on Pasture
6,Manure applied to Soils
7,Synthetic Fertilizers
8,Drained organic soils
9,Drained organic soils (CO2)



 Valors únics de la columna 'Element':
Total únics: 9 (mostrant com a màxim 50)


Unnamed: 0,Element
0,Direct emissions (N2O)
1,Indirect emissions (N2O)
2,Emissions (N2O)
3,Emissions (CO2eq) from N2O (AR5)
4,Emissions (CO2eq) (AR5)
5,Emissions (CH4)
6,Emissions (CO2eq) from CH4 (AR5)
7,Emissions (CO2)
8,Emissions (CO2eq) from F-gases (AR5)



 Valors únics de la columna 'Source':
Total únics: 2 (mostrant com a màxim 50)


Unnamed: 0,Source
0,FAO TIER 1
1,UNFCCC


 Altres dataframes de Emissions_Totals:

 Contingut de Emissions_Totals_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,2,'004,Afghanistan
1,5100,'002,Africa
2,3,'008,Albania
3,4,'012,Algeria
4,5,'016,American Samoa





 Contingut de Emissions_Totals_E_Elements:


Unnamed: 0,Element Code,Element
0,7234,Direct emissions (N2O)
1,7225,Emissions (CH4)
2,7273,Emissions (CO2)
3,723113,Emissions (CO2eq) (AR5)
4,724413,Emissions (CO2eq) from CH4 (AR5)





 Contingut de Emissions_Totals_E_Flags:


Unnamed: 0,Flag,Description
0,A,Official figure
1,E,Estimated value
2,F,Forecast value





 Contingut de Emissions_Totals_E_ItemCodes:


Unnamed: 0,Item Code,CPC Code,Item





 Contingut de Emissions_Totals_E_Sources:


Unnamed: 0,Source Code,Source
0,3050,FAO TIER 1
1,3051,UNFCCC






### Creació de `emissions_df`

Creem el dataframe de Emissions basat en el dataset principal de Emissions_Totals.

#### Justificació de la selecció d’Items, Elements i Font del domini *Emissions Totals*

En el domini d’Emissions Totals, el volum d’informació disponible és molt ampli i inclou emissions agrícoles, forestals, energètiques, industrials i d’altres sectors dels sistemes alimentaris.

##### 1. Selecció d’Items (categories d’emissions)

S’han seleccionat els ítems directament atribuïbles a l’activitat agrícola segons la metodologia del **IPCC** i les pràctiques estàndard de la FAO:

- **Enteric Fermentation**  
  Emissions de CH₄ derivades de la digestió de rumiants.
- **Manure Management**  
  Emissions de CH₄ i N₂O derivades del tractament i emmagatzematge del fem.
- **Synthetic Fertilizers**  
  Emissions de N₂O provinents de l’aplicació d’adobs sintètics.
- **Agricultural Soils**  
  Emissions de N₂O associades a l’activitat biogeoquímica del sòl agrícola.
- **Rice Cultivation**  
  Emissions de CH₄ en arrossars inundats.
- **Crop Residues**  
  Emissions derivades de la descomposició de residus vegetals.
- **Emissions from livestock**  
  Indicador agregat per representar l’impacte total del sector ramader.

Aquests set ítems cobreixen de forma completa les fonts d’emissions pròpiament agrícoles en el sentit IPCC. S’han exclòs categories com *Energy*, *Waste*, *Land-use change*, *Forest fires*, *AFOLU*, *IPCC Agriculture*, o emissions industrials i logístiques (*Food processing*, *Transport*, *Packaging*), ja que no formen part del nucli de l’activitat agrícola estricta, barregen múltiples sectors o generen duplicacions d’informació.

Source: https://www.ipcc.ch/languages-2/spanish/

##### 2. Selecció d’Elements (tipus d’emissió)

S’ha seleccionat únicament:

- **Emissions (CO₂eq) (AR5)**

Aquesta elecció es justifica perquè:

1. **CO₂eq** integra CH₄ i N₂O en una única mètrica comparable utilitzant els factors de potencial d’escalfament global del **IPCC AR5**.  
2. Evita la necessitat de gestionar columnes múltiples per contaminant, reduint la dimensionalitat del conjunt de dades.  
3. És l’estàndard més utilitzat en informes internacionals de política climàtica i estudis de sostenibilitat agrícola.

Elements com *Direct emissions (N₂O)*, *Emissions (CH₄)* o *Emissions (CO₂)* s’han descartat per ser redundants o massa específics.

Source: https://www.ipcc.ch/assessment-report/ar5/

##### 3. Selecció de Font (Source)

Entre les dues fonts disponibles:

- **FAO TIER 1**  
- **UNFCCC**

S’ha seleccionat **FAO TIER 1** per oferir una metodologia harmonitzada i consistent derivada de les guies del IPCC.  

**Transformacions realitzades:**
- S'han eliminat les columnes `Note` i `Flag` (informació no rellevant per a l'anàlisi)
- S'han renombrat les columnes per claredat: `Item` → `emission_category`, `Element` → `emission_metric`
- S'han filtrat els items per incloure només les categories d’emissió agrícoles següents:
  - Enteric Fermentation
  - Manure Management
  - Synthetic Fertilizers
  - Agricultural Soils
  - Rice Cultivation
  - Crop Residues
  - Emissions from livestock
- S'ha filtrat l'element per incloure només: `Emissions (CO2eq) (AR5)`
- S'ha filtrat per quedar-se amb la font FAO TIER 1.

In [35]:
key = 'Emissions_Totals'
df_name = 'emissions_df'
column_mapping = {
    'Item Code': 'emission_category_code',
    'Item': 'emission_category',
    'Element Code': 'emission_metric_code',
    'Element': 'emission_metric'
}

# Definir les categories d’item i elements a conservar segons els criteris exposats
values_to_keep = {
    'emission_category': [
        "Enteric Fermentation",
        "Manure Management",
        "Synthetic Fertilizers",
        "Agricultural Soils",
        "Rice Cultivation",
        "Crop Residues",
        "Emissions from livestock"
    ],
    'emission_metric': [
        "Emissions (CO2eq) (AR5)"
    ],
    'Source': [
        "FAO TIER 1"
    ]
}


# Crear el dataframe filtrat
emissions_df = crear_df_faostat_filtrat(
    key=key,
    df_name=df_name,
    column_mapping=column_mapping,
    values_to_keep=values_to_keep
)

Creació de emissions_df

Valors diferents de 'Unit':
emission_category         emission_metric          Source    
Agricultural Soils        Emissions (CO2eq) (AR5)  FAO TIER 1    [kt]
Crop Residues             Emissions (CO2eq) (AR5)  FAO TIER 1    [kt]
Emissions from livestock  Emissions (CO2eq) (AR5)  FAO TIER 1    [kt]
Enteric Fermentation      Emissions (CO2eq) (AR5)  FAO TIER 1    [kt]
Manure Management         Emissions (CO2eq) (AR5)  FAO TIER 1    [kt]
Rice Cultivation          Emissions (CO2eq) (AR5)  FAO TIER 1    [kt]
Synthetic Fertilizers     Emissions (CO2eq) (AR5)  FAO TIER 1    [kt]
Name: Unit, dtype: object

Dimensions finals: 97,415 files x 13 columnes

Columnes finals (13):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. emission_category_code
   5. emission_category
   6. emission_metric_code
   7. emission_metric
   8. Year Code
   9. Year
  10. Source Code
  11. Source
  12. Unit
  13. Value

Primeres files de emissions_df:


Unnamed: 0,Area Code,Area Code (M49),Area,emission_category_code,emission_category,emission_metric_code,emission_metric,Year Code,Year,Source Code,Source,Unit,Value
260,2,'004,Afghanistan,5064,Crop Residues,723113,Emissions (CO2eq) (AR5),1961,1961,3050,FAO TIER 1,kt,284.451
261,2,'004,Afghanistan,5064,Crop Residues,723113,Emissions (CO2eq) (AR5),1962,1962,3050,FAO TIER 1,kt,286.624
262,2,'004,Afghanistan,5064,Crop Residues,723113,Emissions (CO2eq) (AR5),1963,1963,3050,FAO TIER 1,kt,267.3585
263,2,'004,Afghanistan,5064,Crop Residues,723113,Emissions (CO2eq) (AR5),1964,1964,3050,FAO TIER 1,kt,288.32
264,2,'004,Afghanistan,5064,Crop Residues,723113,Emissions (CO2eq) (AR5),1965,1965,3050,FAO TIER 1,kt,291.447



Resum de valors únics:
  - emission_category únics: 7 a ['Agricultural Soils', 'Crop Residues', 'Emissions from livestock', 'Enteric Fermentation', 'Manure Management', 'Rice Cultivation', 'Synthetic Fertilizers']
  - emission_metric únics: 1 a ['Emissions (CO2eq) (AR5)']
  - Source únics: 1 a ['FAO TIER 1']


### Pivot de `emissions_df`

In [36]:
# Crear la columna pivot_col segons les instruccions
emissions_df["pivot_col"] = (
    "Value_"
    + emissions_df["emission_category_code"].astype(str)
    + "_"
    + emissions_df["emission_category"].str.lower().str.replace(" ", "_")
    + "_CO2eq_AR5_kt_FAO_TIER_1"
)

emissions_pivot = pivot_faostat_df(emissions_df)


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_1709_agricultural_soils_CO2eq_AR5_kt_FAO_TIER_1,Value_5058_enteric_fermentation_CO2eq_AR5_kt_FAO_TIER_1,Value_5059_manure_management_CO2eq_AR5_kt_FAO_TIER_1,Value_5060_rice_cultivation_CO2eq_AR5_kt_FAO_TIER_1,Value_5061_synthetic_fertilizers_CO2eq_AR5_kt_FAO_TIER_1,Value_5064_crop_residues_CO2eq_AR5_kt_FAO_TIER_1,Value_5085_emissions_from_livestock_CO2eq_AR5_kt_FAO_TIER_1
10788,203,'724,Spain,1961,1961,6961.444,14814.5032,4186.0065,873.5328,1805.2595,646.3615,23510.3355
10789,203,'724,Spain,1962,1962,7032.3315,14564.312,4227.3978,889.6216,1906.3305,752.9975,23164.7133
10790,203,'724,Spain,1963,1963,6851.416,14277.284,4273.5225,884.8224,1837.563,761.61,22803.0495
10791,203,'724,Spain,1964,1964,6870.867,14158.4716,4257.4815,904.4112,2008.7265,682.2955,22595.7981
10792,203,'724,Spain,1965,1965,6907.914,14010.2284,3734.426,832.58,2122.756,718.4415,21811.3709


### Merge de `emissions_df` amb el dataframe principal

Un cop creat el pivot, farem el merge amb el dataframe principal utilitzant la funció `merge_agri_dataframes`.


In [37]:
# Un cop creat emissions_pivot, fem el merge utilitzant la funció generalitzada
agri_emissions_df = merge_agri_dataframes(
    df_initial=agri_landcover_df,
    df_to_merge=emissions_pivot,
    df_name_initial="agri_landcover_df",
    df_name_to_merge="emissions_pivot"
)

Nombre de files de agri_landcover_df: 12729
Nombre de columnes de agri_landcover_df: 27
Nombre de files de emissions_pivot: 15919
Nombre de columnes de emissions_pivot: 12
Nombre final de files del resultat: 12729
Nombre final de columnes del resultat: 34


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha,Value_pesticide_agricultural_use_t,Value_6971_herbaceous_crops_ha,Value_6974_tree-covered_areas_ha,Value_6983_grassland_ha,Value_1709_agricultural_soils_CO2eq_AR5_kt_FAO_TIER_1,Value_5058_enteric_fermentation_CO2eq_AR5_kt_FAO_TIER_1,Value_5059_manure_management_CO2eq_AR5_kt_FAO_TIER_1,Value_5060_rice_cultivation_CO2eq_AR5_kt_FAO_TIER_1,Value_5061_synthetic_fertilizers_CO2eq_AR5_kt_FAO_TIER_1,Value_5064_crop_residues_CO2eq_AR5_kt_FAO_TIER_1,Value_5085_emissions_from_livestock_CO2eq_AR5_kt_FAO_TIER_1
8697,203,'724,Spain,2020,2020,1059299.0,63.64,22.22,18.35,486673.0,29.24,10.21,8.43,399489.0,24.0,8.38,6.92,49955664.5,26142638.1,16646395.1,11639398.1,5006997.0,9496243.0,75775.0,15384230.0,12553120.0,7383290.0,13578.229,17162.2304,11289.9885,1440.2696,5844.8665,1548.395,34564.6029


### Anàlisi del dataset Employment Indicators

Analitzem el dataset de Employment Indicators per veure quina informació conté i com podem integrar-la al dataframe final.


In [38]:
# Analitzar el dataset de Employment Indicators
analitzar_dataset_faostat('Employment_Indicators_Agriculture')


Anàlisi del dataset: Employment_Indicators_Agriculture

Dataframe principal (Employment_Indicators_Agriculture_E_All_Data_(Normalized)):
Dimensions: 188,070 files x 17 columnes

 Columnes (17):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Source Code
   5. Source
   6. Indicator Code
   7. Indicator
   8. Sex Code
   9. Sex
  10. Element Code
  11. Element
  12. Year Code
  13. Year
  14. Unit
  15. Value
  16. Flag
  17. Note

 Primeres files del dataframe principal:


Unnamed: 0,Area Code,Area Code (M49),Area,Source Code,Source,Indicator Code,Indicator,Sex Code,Sex,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note
0,2,'004,Afghanistan,3044,FAO Model,21160,Total employment in agrifood systems (AFS),1,Total,6199,Value,2000,2000,1000 No,3367.93,E,Modelled FAO
1,2,'004,Afghanistan,3044,FAO Model,21160,Total employment in agrifood systems (AFS),1,Total,6199,Value,2001,2001,1000 No,3333.57,E,Modelled FAO
2,2,'004,Afghanistan,3044,FAO Model,21160,Total employment in agrifood systems (AFS),1,Total,6199,Value,2002,2002,1000 No,3508.97,E,Modelled FAO
3,2,'004,Afghanistan,3044,FAO Model,21160,Total employment in agrifood systems (AFS),1,Total,6199,Value,2003,2003,1000 No,3721.4,E,Modelled FAO
4,2,'004,Afghanistan,3044,FAO Model,21160,Total employment in agrifood systems (AFS),1,Total,6199,Value,2004,2004,1000 No,3803.91,E,Modelled FAO



 Valors únics de la columna 'Element':
Total únics: 1 (mostrant com a màxim 50)


Unnamed: 0,Element
0,Value



 Valors únics de la columna 'Indicator':
Total únics: 30 (mostrant com a màxim 50)


Unnamed: 0,Indicator
0,Total employment in agrifood systems (AFS)
1,Total non-agricultural AFS employment
2,Share of AFS employment in total employment
3,Share of agricultural employment in total AFS employment
4,Share of non-agricultural AFS employment in total AFS employment
5,Share of non-agricultural AFS employment in total employment
6,"Employment in agriculture by age, total (15+)"
7,"Employment in agriculture by age, 15 to 24"
8,"Employment in agriculture by age, 25 to 54"
9,"Employment in agriculture by age, 55 to 64"



 Valors únics de la columna 'Source':
Total únics: 12 (mostrant com a màxim 50)


Unnamed: 0,Source
0,FAO Model
1,Household income and expenditure survey
2,ILO - ILO Modelled Estimates
3,ILO - Modelled Estimates; FAOSTAT - Value added (2015 US$)
4,Labour force survey
5,Employment surveys
6,Official estimates
7,Population census
8,Administrative records
9,Establishment census


 Altres dataframes de Employment_Indicators_Agriculture:

 Contingut de Employment_Indicators_Agriculture_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,2,'004,Afghanistan
1,5100,'002,Africa
2,3,'008,Albania
3,4,'012,Algeria
4,5,'016,American Samoa





 Contingut de Employment_Indicators_Agriculture_E_Elements:


Unnamed: 0,Element Code,Element
0,6121,Value
1,6173,Value
2,6199,Value
3,6228,Value





 Contingut de Employment_Indicators_Agriculture_E_Flags:


Unnamed: 0,Flag,Description
0,E,Estimated value
1,X,Figure from external organization





 Contingut de Employment_Indicators_Agriculture_E_Indicators:


Unnamed: 0,Indicator Code,Indicator
0,21085,Agriculture value added per worker (constant 2015 US$)
1,21144,Employment in agriculture - ILO modelled estimates
2,21145,Employment in agriculture by age
3,21088,Employment in agriculture by age; 15 to 24
4,21089,Employment in agriculture by age; 25 to 54





 Contingut de Employment_Indicators_Agriculture_E_Sexs:


Unnamed: 0,Sex Code,Sex
0,3,Female
1,2,Male
2,1,Total





 Contingut de Employment_Indicators_Agriculture_E_Sources:


Unnamed: 0,Source Code,Source
0,3012,Administrative insurance records
1,3014,Administrative population register
2,3015,Administrative records
3,3018,Employment surveys
4,3019,Establishment census






### Creació de `employment_df`

Creem el dataframe de Employment Indicators basat en el dataset principal de Employment_Indicators_Agriculture.

El conjunt de dades d'ocupació agrícola presenta una gran varietat d’indicadors provinents de fonts diferents (ILO, FAO, enquestes nacionals, censos, etc.), amb metodologies i cobertures molt desiguals. Per tal d’obtenir un conjunt de dades coherent, interpretable i útil per analitzar la relació entre activitat agrícola, productivitat, ús del sòl i clima, és imprescindible reduir dràsticament el nombre d’indicadors i seleccionar únicament aquells que siguin robusts, comparables i rellevants a escala global.

La selecció final consta de cinc indicadors, cadascun aportant informació complementària i essencial per entendre les dinàmiques laborals del sector agrícola.

1. **Employment in agriculture – ILO modelled estimates**  
Aquest és l’indicador central per a un estudi d’agricultura. Les estimacions modelitzades de l’ILO proporcionen dades harmonitzades, consistents i comparables entre països i al llarg del temps, evitant els problemes derivats de les fortes diferències metodològiques dels censos i enquestes nacionals. És la mesura estàndard utilitzada en estudis internacionals per quantificar la mà d’obra agrícola activa. 

Source: https://ilostat.ilo.org/methods/concepts-and-definitions/ilo-modelled-estimates/

2. **Share of employment in agriculture in total employment – ILO modelled estimates**  
La proporció de treballadors ocupats en el sector agrícola, també harmonitzada per l’ILO, permet analitzar el pes del sector en l’economia de cada país. Aquest indicador és fonamental per estudiar processos de transformació estructural, desenvolupament econòmic i especialització productiva. És especialment útil per comparar països amb dimensions de població molt diferents.

3. **Agriculture value added per worker (constant 2015 US$)**  
Aquest indicador mesura la productivitat laboral agrícola i és clau per entendre la relació entre rendiment econòmic, inputs agrícoles (com fertilitzants i pesticides) i condicions climàtiques. Aporta una dimensió econòmica que enriqueix enormement l'anàlisi, permetent contextualitzar els nivells de desenvolupament i eficiència del sector agrícola de cada país.

4. **Employment in crop and animal production, hunting and related service activities**  
Aquest indicador desglossa l’ocupació estrictament agrícola de forma més precisa, excloent sectors paral·lels com la pesca o la silvicultura. Aporta granularitat sense perdre coherència i permet distingir el nucli central de l’activitat agrícola dins del conjunt més ampli dels sistemes alimentaris.


La selecció proposada prioritza coherència, comparabilitat internacional i rellevància directa per a l’estudi de la relació entre agricultura, clima i productivitat i redueix la complexitat del domini, originalment amb 30 indicadors.

Aquest enfocament garanteix un conjunt de dades final molt més sòlid, interpretable i alineat amb els objectius de la pràctica.

**Transformacions realitzades:**
- Eliminem les columnes `Note` i `Flag` (informació no utilitzable per a l'anàlisi)
- Renombrem columnes per a millor claredat 
- Filtrar valors.


In [39]:
# Crear employment_df utilitzant la funció generalitzada
key = 'Employment_Indicators_Agriculture'
df_name = 'employment_df'
column_mapping = {
    'Indicator Code': 'employment_indicator_code',
    'Indicator': 'employment_indicator',
    'Element Code': 'employment_metric_code',
    'Element': 'employment_metric',
    'Source Code': 'employment_source_code',
    'Source': 'employment_source',
    'Sex Code': 'employment_sex_code',
    'Sex': 'employment_sex'
}
values_to_keep = {
    'employment_indicator': [
        'Employment in agriculture - ILO modelled estimates',
        'Share of employment in agriculture in total employment - ILO Modelled Estimates',
        'Agriculture value added per worker (constant 2015 US$)',
        'Employment in crop and animal production, hunting and related service activities'
    ]
}

employment_df = crear_df_faostat_filtrat(
    key=key,
    df_name=df_name,
    column_mapping=column_mapping,
    values_to_keep=values_to_keep
)


Creació de employment_df

Valors diferents de 'Unit':
employment_indicator
Agriculture value added per worker (constant 2015 US$)                                  [USD]
Employment in agriculture - ILO modelled estimates                                  [1000 No]
Employment in crop and animal production, hunting and related service activities    [1000 No]
Share of employment in agriculture in total employment - ILO Modelled Estimates           [%]
Name: Unit, dtype: object

Dimensions finals: 55,800 files x 15 columnes

Columnes finals (15):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. employment_source_code
   5. employment_source
   6. employment_indicator_code
   7. employment_indicator
   8. employment_sex_code
   9. employment_sex
  10. employment_metric_code
  11. employment_metric
  12. Year Code
  13. Year
  14. Unit
  15. Value

Primeres files de employment_df:


Unnamed: 0,Area Code,Area Code (M49),Area,employment_source_code,employment_source,employment_indicator_code,employment_indicator,employment_sex_code,employment_sex,employment_metric_code,employment_metric,Year Code,Year,Unit,Value
234,2,'004,Afghanistan,3021,Household income and expenditure survey,21111,"Employment in crop and animal production, hunting and related service activities",1,Total,6199,Value,2014,2014,1000 No,2824.35
235,2,'004,Afghanistan,3021,Household income and expenditure survey,21111,"Employment in crop and animal production, hunting and related service activities",1,Total,6199,Value,2017,2017,1000 No,2737.94
236,2,'004,Afghanistan,3021,Household income and expenditure survey,21111,"Employment in crop and animal production, hunting and related service activities",2,Male,6199,Value,2014,2014,1000 No,1870.42
237,2,'004,Afghanistan,3021,Household income and expenditure survey,21111,"Employment in crop and animal production, hunting and related service activities",2,Male,6199,Value,2017,2017,1000 No,1834.38
238,2,'004,Afghanistan,3021,Household income and expenditure survey,21111,"Employment in crop and animal production, hunting and related service activities",3,Female,6199,Value,2014,2014,1000 No,953.93



Resum de valors únics:
  - employment_indicator únics: 4 a ['Agriculture value added per worker (constant 2015 US$)', 'Employment in agriculture - ILO modelled estimates', 'Employment in crop and animal production, hunting and related service activities', 'Share of employment in agriculture in total employment - ILO Modelled Estimates']


In [40]:
employment_df.groupby("employment_indicator")["employment_source"].unique()

employment_indicator
Agriculture value added per worker (constant 2015 US$)                                                                     [ILO - Modelled Estimates; FAOSTAT - Value added (2015 US$)]
Employment in agriculture - ILO modelled estimates                                                                                                       [ILO - ILO Modelled Estimates]
Employment in crop and animal production, hunting and related service activities    [Household income and expenditure survey, Labour force survey, Population census, Household survey]
Share of employment in agriculture in total employment - ILO Modelled Estimates                                                                          [ILO - ILO Modelled Estimates]
Name: employment_source, dtype: object

Com que les fonts són úniques per cada indicador, no cal afegir-les a la columna.

### Pivot de `employment_df`


In [41]:
# Convertir "1000 No" a "No" multiplicant el valor per 1000
mask_1000no = employment_df['Unit'] == '1000 No'
employment_df['Value'] = np.where(mask_1000no, employment_df['Value'] * 1000, employment_df['Value'])
employment_df['Unit'] = np.where(mask_1000no, 'No', employment_df['Unit'])

print("\nUnitats després de la conversió:")
print(employment_df.groupby("employment_indicator")["Unit"].unique())

# Crear la columna pivot_col amb el format especificat
employment_df["pivot_col"] = (
    "Value_"
    + employment_df["employment_indicator_code"].astype(str)
    + "_"
    + employment_df["employment_indicator"].str.lower().str.replace(' ', '_').str.replace(',', '').str.replace('–', '').str.replace('(', '').str.replace(')', '').str.replace('$', 'usd')
    + "_"
    + employment_df["employment_sex"].str.lower()
    + "_"
    + employment_df["Unit"]
)

# Pivotar el dataframe
employment_pivot = pivot_faostat_df(employment_df)



Unitats després de la conversió:
employment_indicator
Agriculture value added per worker (constant 2015 US$)                              [USD]
Employment in agriculture - ILO modelled estimates                                   [No]
Employment in crop and animal production, hunting and related service activities     [No]
Share of employment in agriculture in total employment - ILO Modelled Estimates       [%]
Name: Unit, dtype: object


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_21085_agriculture_value_added_per_worker_constant_2015_ususd_total_USD,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_female_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_male_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_total_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_female_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_male_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_total_No,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_female_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_male_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_total_%
4889,203,'724,Spain,1991,1991,15467.34,,,,372790.0,1042140.0,1414920.0,8.9,11.5,10.7
4890,203,'724,Spain,1992,1992,17262.18,,,,365660.0,914910.0,1280570.0,8.6,10.3,9.7
4891,203,'724,Spain,1993,1993,18362.87,321660.0,758830.0,1080300.0,337150.0,900650.0,1237800.0,8.1,10.6,9.8
4892,203,'724,Spain,1994,1994,18133.86,299220.0,740890.0,1039940.0,312650.0,881460.0,1194100.0,7.4,10.5,9.5
4893,203,'724,Spain,1995,1995,17478.29,296710.0,721190.0,1018100.0,311670.0,851310.0,1162980.0,7.1,9.9,9.0


### Merge de `employment_df` amb el dataframe principal

Un cop creat el pivot, farem el merge amb el dataframe principal utilitzant la funció `merge_agri_dataframes`.


In [42]:
# Fer el merge utilitzant la funció generalitzada
agri_employment_df = merge_agri_dataframes(
    df_initial=agri_emissions_df,
    df_to_merge=employment_pivot,
    df_name_initial="agri_emissions_df",
    df_name_to_merge="employment_pivot"
)

Nombre de files de agri_emissions_df: 12729
Nombre de columnes de agri_emissions_df: 34
Nombre de files de employment_pivot: 7431
Nombre de columnes de employment_pivot: 15
Nombre final de files del resultat: 12729
Nombre final de columnes del resultat: 44


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha,Value_pesticide_agricultural_use_t,Value_6971_herbaceous_crops_ha,Value_6974_tree-covered_areas_ha,Value_6983_grassland_ha,Value_1709_agricultural_soils_CO2eq_AR5_kt_FAO_TIER_1,Value_5058_enteric_fermentation_CO2eq_AR5_kt_FAO_TIER_1,Value_5059_manure_management_CO2eq_AR5_kt_FAO_TIER_1,Value_5060_rice_cultivation_CO2eq_AR5_kt_FAO_TIER_1,Value_5061_synthetic_fertilizers_CO2eq_AR5_kt_FAO_TIER_1,Value_5064_crop_residues_CO2eq_AR5_kt_FAO_TIER_1,Value_5085_emissions_from_livestock_CO2eq_AR5_kt_FAO_TIER_1,Value_21085_agriculture_value_added_per_worker_constant_2015_ususd_total_USD,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_female_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_male_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_total_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_female_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_male_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_total_No,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_female_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_male_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_total_%
8697,203,'724,Spain,2020,2020,1059299.0,63.64,22.22,18.35,486673.0,29.24,10.21,8.43,399489.0,24.0,8.38,6.92,49955664.5,26142638.1,16646395.1,11639398.1,5006997.0,9496243.0,75775.0,15384230.0,12553120.0,7383290.0,13578.229,17162.2304,11289.9885,1440.2696,5844.8665,1548.395,34564.6029,43474.11,165150.0,532600.0,697820.0,171200.0,605510.0,776710.0,1.9,5.7,4.0


### Anàlisi del dataset Population

Analitzem el dataset de Population per veure quina informació conté i com podem integrar-la al dataframe final.


In [43]:
# Analitzar el dataset de Population
analitzar_dataset_faostat('Population')


Anàlisi del dataset: Population

Dataframe principal (Population_E_All_Data_(Normalized)):
Dimensions: 168,404 files x 13 columnes

 Columnes (13):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item
   6. Element Code
   7. Element
   8. Year Code
   9. Year
  10. Unit
  11. Value
  12. Flag
  13. Note

 Primeres files del dataframe principal:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Note
0,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1950,1950,1000 No,7776.176,X,
1,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1951,1951,1000 No,7879.339,X,
2,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1952,1952,1000 No,7987.783,X,
3,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1953,1953,1000 No,8096.698,X,
4,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1954,1954,1000 No,8207.95,X,



 Valors únics de la columna 'Item':
Total únics: 1 (mostrant com a màxim 50)


Unnamed: 0,Item
0,Population - Est. & Proj.



 Valors únics de la columna 'Element':
Total únics: 5 (mostrant com a màxim 50)


Unnamed: 0,Element
0,Total Population - Both sexes
1,Total Population - Male
2,Total Population - Female
3,Rural population
4,Urban population


 Altres dataframes de Population:

 Contingut de Population_E_AreaCodes:


Unnamed: 0,Area Code,M49 Code,Area
0,2,'004,Afghanistan
1,5100,'002,Africa
2,3,'008,Albania
3,4,'012,Algeria
4,5,'016,American Samoa





 Contingut de Population_E_Elements:


Unnamed: 0,Element Code,Element
0,551,Rural population
1,511,Total Population - Both sexes
2,513,Total Population - Female
3,512,Total Population - Male
4,561,Urban population





 Contingut de Population_E_Flags:


Unnamed: 0,Flag,Description
0,E,Estimated value
1,X,Figure from external organization





 Contingut de Population_E_ItemCodes:


Unnamed: 0,Item Code,CPC Code,Item






### Creació de `population_df`

Creem el dataframe de Population basat en el dataset principal de Population.

Del domini Population hem seleccionat els indicadors de població total i la seva desagregació per sexe (masculí/femení) i ubicació (rural/urbà). Aquests indicadors són essencials per contextualitzar la producció agrícola i l'ús de recursos en relació amb la pressió demogràfica de cada país. La informació demogràfica permet analitzar la seguretat alimentària, l'eficiència productiva per capita i les dinàmiques de desenvolupament rural-urbà.

**Transformacions realitzades:**
- Eliminem les columnes `Note` i `Flag` (informació no utilitzable per a l'anàlisi)
- Renombrem columnes per a millor claredat: `Element` a `population_type`
- Filtrar per elements: Total Population - Both sexes, Total Population - Female, Total Population - Male, Rural Population, Urban Population
- Convertir de 1000 No a No (multiplicar per 1000)
- Calcular percentatges sobre la població total


In [44]:
# Crear population_df utilitzant la funció generalitzada
key = 'Population'
df_name = 'population_df'
column_mapping = {
    'Element Code': 'population_type_code',
    'Element': 'population_type'
}
values_to_keep = {
    'population_type': [
        'Total Population - Both sexes',
        'Total Population - Female',
        'Total Population - Male',
        'Rural population',
        'Urban population'
    ]
}

population_df = crear_df_faostat_filtrat(
    key=key,
    df_name=df_name,
    column_mapping=column_mapping,
    values_to_keep=values_to_keep
)

# Convertir de 1000 No a No
population_df['Value'] = population_df['Value'] * 1000
population_df['Unit'] = 'No'
print("\nValors convertits de 1000 No a No (multiplicats per 1000)")

# Em vist a l'anàlisi que no necessitem l'Item Code ni Item, per tant el descartem.
population_df = population_df.drop(columns=['Item Code', 'Item'])

display(population_df.head())


Creació de population_df



Valors diferents de 'Unit':
population_type
Rural population                 [1000 No]
Total Population - Both sexes    [1000 No]
Total Population - Female        [1000 No]
Total Population - Male          [1000 No]
Urban population                 [1000 No]
Name: Unit, dtype: object

Dimensions finals: 168,404 files x 11 columnes

Columnes finals (11):
   1. Area Code
   2. Area Code (M49)
   3. Area
   4. Item Code
   5. Item
   6. population_type_code
   7. population_type
   8. Year Code
   9. Year
  10. Unit
  11. Value

Primeres files de population_df:


Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,population_type_code,population_type,Year Code,Year,Unit,Value
0,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1950,1950,1000 No,7776.176
1,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1951,1951,1000 No,7879.339
2,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1952,1952,1000 No,7987.783
3,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1953,1953,1000 No,8096.698
4,2,'004,Afghanistan,3010,Population - Est. & Proj.,511,Total Population - Both sexes,1954,1954,1000 No,8207.95



Resum de valors únics:
  - population_type únics: 5 a ['Rural population', 'Total Population - Both sexes', 'Total Population - Female', 'Total Population - Male', 'Urban population']

Valors convertits de 1000 No a No (multiplicats per 1000)


Unnamed: 0,Area Code,Area Code (M49),Area,population_type_code,population_type,Year Code,Year,Unit,Value
0,2,'004,Afghanistan,511,Total Population - Both sexes,1950,1950,No,7776176.0
1,2,'004,Afghanistan,511,Total Population - Both sexes,1951,1951,No,7879339.0
2,2,'004,Afghanistan,511,Total Population - Both sexes,1952,1952,No,7987783.0
3,2,'004,Afghanistan,511,Total Population - Both sexes,1953,1953,No,8096698.0
4,2,'004,Afghanistan,511,Total Population - Both sexes,1954,1954,No,8207950.0


### Pivot de `population_df`

Realitzem el pivot per tenir una columna per cada tipus de població i calculem els percentatges.


In [45]:
# Crear la columna pivot_col
population_df['pivot_col'] = (
    population_df['population_type']
    .str.replace('Total Population - ', '')
    .str.replace('Both sexes', 'Total')
    .str.replace(' ', '_')
    .str.lower()
)

# Pivotem el dataframe utilitzant la funció generalitzada
population_pivot = pivot_faostat_df(population_df)

# Calcular percentatges sobre la població total
population_pivot['female_population_%'] = (population_pivot['female'] / population_pivot['total']) * 100
population_pivot['male_population_%'] = (population_pivot['male'] / population_pivot['total']) * 100
population_pivot['rural_population_%'] = (population_pivot['rural_population'] / population_pivot['total']) * 100
population_pivot['urban_population_%'] = (population_pivot['urban_population'] / population_pivot['total']) * 100

# Renombrar la columna total i eliminar les columnes absolutes que no necessitem
population_pivot = population_pivot.rename(columns={'total': 'total_population_No'})
population_pivot = population_pivot.drop(columns=['female', 'male', 'rural_population', 'urban_population'])

print("\nColumnes finals de population_pivot:")
print(population_pivot.columns.tolist())
print("\nPrimeres files:")
display(population_pivot[population_pivot['Area'] == 'Spain'].head())


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,female,male,rural_population,total,urban_population
26382,203,'724,Spain,1950,1950,14567665.0,13509384.0,13496009.0,28077049.0,14573726.0
26383,203,'724,Spain,1951,1951,14626971.0,13587889.0,13462062.0,28214860.0,14781573.0
26384,203,'724,Spain,1952,1952,14713128.0,13692914.0,13422112.0,28406041.0,15020261.0
26385,203,'724,Spain,1953,1953,14816229.0,13816468.0,13389000.0,28632697.0,15269669.0
26386,203,'724,Spain,1954,1954,14919205.0,13938623.0,13359555.0,28857828.0,15527794.0



Columnes finals de population_pivot:
['Area Code', 'Area Code (M49)', 'Area', 'Year Code', 'Year', 'total_population_No', 'female_population_%', 'male_population_%', 'rural_population_%', 'urban_population_%']

Primeres files:


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,total_population_No,female_population_%,male_population_%,rural_population_%,urban_population_%
26382,203,'724,Spain,1950,1950,28077049.0,51.884602,48.115398,48.067762,51.906189
26383,203,'724,Spain,1951,1951,28214860.0,51.841374,48.158626,47.712666,52.389319
26384,203,'724,Spain,1952,1952,28406041.0,51.795771,48.204232,47.250907,52.876995
26385,203,'724,Spain,1953,1953,28632697.0,51.745838,48.254162,46.761225,53.329482
26386,203,'724,Spain,1954,1954,28857828.0,51.698988,48.301012,46.294388,53.80791


### Merge de `population_df` amb el dataframe principal

Un cop creat el pivot amb els percentatges, farem el merge amb el dataframe principal utilitzant la funció `merge_agri_dataframes`.


In [46]:
# Fer el merge utilitzant la funció generalitzada
agri_population_df = merge_agri_dataframes(
    df_initial=agri_employment_df,
    df_to_merge=population_pivot,
    df_name_initial="agri_employment_df",
    df_name_to_merge="population_pivot"
)


Nombre de files de agri_employment_df: 12729
Nombre de columnes de agri_employment_df: 44
Nombre de files de population_pivot: 39174
Nombre de columnes de population_pivot: 10
Nombre final de files del resultat: 12729
Nombre final de columnes del resultat: 49


Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha,Value_pesticide_agricultural_use_t,Value_6971_herbaceous_crops_ha,Value_6974_tree-covered_areas_ha,Value_6983_grassland_ha,Value_1709_agricultural_soils_CO2eq_AR5_kt_FAO_TIER_1,Value_5058_enteric_fermentation_CO2eq_AR5_kt_FAO_TIER_1,Value_5059_manure_management_CO2eq_AR5_kt_FAO_TIER_1,Value_5060_rice_cultivation_CO2eq_AR5_kt_FAO_TIER_1,Value_5061_synthetic_fertilizers_CO2eq_AR5_kt_FAO_TIER_1,Value_5064_crop_residues_CO2eq_AR5_kt_FAO_TIER_1,Value_5085_emissions_from_livestock_CO2eq_AR5_kt_FAO_TIER_1,Value_21085_agriculture_value_added_per_worker_constant_2015_ususd_total_USD,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_female_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_male_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_total_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_female_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_male_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_total_No,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_female_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_male_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_total_%,total_population_No,female_population_%,male_population_%,rural_population_%,urban_population_%
8697,203,'724,Spain,2020,2020,1059299.0,63.64,22.22,18.35,486673.0,29.24,10.21,8.43,399489.0,24.0,8.38,6.92,49955664.5,26142638.1,16646395.1,11639398.1,5006997.0,9496243.0,75775.0,15384230.0,12553120.0,7383290.0,13578.229,17162.2304,11289.9885,1440.2696,5844.8665,1548.395,34564.6029,43474.11,165150.0,532600.0,697820.0,171200.0,605510.0,776710.0,1.9,5.7,4.0,47679489.0,50.886131,49.113869,18.699198,78.741484


## Anàlisi dels datasets climàtics

Un cop construïts els dataframes agrícoles amb informació de producció, preus, fertilitzants, ús del sòl, pesticides, cobertura terrestre, emissions i ocupació, és essencial integrar dades climàtiques per analitzar la relació entre l'agricultura i el canvi climàtic.

Els datasets climàtics que analitzarem són:

1. **Climate_Indicators_Annual_Mean_Global_Surface_Temperature**: Temperatura superficial global anual
2. **World_Development_Indicators**: Indicadors de desenvolupament mundial (incloent clima)
3. **annual_mean_historical_pr**: Precipitació mitjana anual històrica
4. **annual_mean_historical_tas**: Temperatura mitjana anual històrica

Aquests datasets ens permetran:
- Contextualitzar la producció agrícola amb les condicions climàtiques de cada país
- Analitzar l'impacte del canvi climàtic en la productivitat i les pràctiques agrícoles
- Identificar patrons de correlació entre variables climàtiques i agrícoles
- Avaluar la vulnerabilitat dels sistemes agrícoles als canvis ambientals


### Definició de funció per analitzar datasets climàtics


In [47]:
def analitzar_climate_df(
    df: pd.DataFrame,
    df_name: str,
    cols_to_show_unique: list[str] = None,
) -> pd.DataFrame:
    """
    Funció per analitzar un dataset climàtic.
    
    Mostra:
    - Dimensions del dataframe
    - Head() del dataframe
    - Valors únics de les columnes especificades
    
    Paràmetres:
    -----------
    df : pd.DataFrame
        Dataframe a analitzar
    df_name : str
        Nom del dataframe (per als prints)
    cols_to_show_unique : list[str]
        Llista de columnes per mostrar valors únics
        
    Retorna:
    --------
    pd.DataFrame
        Dataframe processat (amb header fixat si cal)
    """
    print(f"Anàlisi del dataset: {df_name}")
    
    # Mostrar dimensions
    print(f"\nDimensions: {df.shape[0]:,} files x {df.shape[1]} columnes")
    
    # Mostrar columnes
    print(f"\nColumnes ({len(df.columns)}):")
    for i, col in enumerate(df.columns, 1):
        print(f"  {i:2d}. {col}")
    
    # Mostrar head
    print(f"\nPrimeres files de {df_name}:")
    display(df.head())
    
    # Mostrar valors únics de les columnes especificades
    if cols_to_show_unique:
        for col in cols_to_show_unique:
            if col in df.columns:
                unique_values = df[col].dropna().unique()
                print(f"\nValors únics de la columna '{col}':")
                print(f"Total únics: {len(unique_values)}")
                if len(unique_values) <= 20:
                    print(f"Valors: {sorted(unique_values)}")
                else:
                    print(f"Primers 20 valors: {sorted(unique_values)[:20]}")
            else:
                print(f"\nLa columna '{col}' no existeix al dataframe")
        
    return df


### Anàlisi de Climate_Indicators_Annual_Mean_Global_Surface_Temperature


In [48]:
# Analitzar Climate_Indicators amb fix de header
df_name = "Climate_Indicators_Annual_Mean_Global_Surface_Temperature"
climate_indicators_df = analitzar_climate_df(
    df=data_dict["climate_data"][df_name],
    df_name=df_name,
    cols_to_show_unique=['Indicator', 'Unit', 'Source', 'CTS Name', 'CTS Full Descriptor'],
)

Anàlisi del dataset: Climate_Indicators_Annual_Mean_Global_Surface_Temperature

Dimensions: 231 files x 74 columnes

Columnes (74):
   1. ObjectId
   2. Country
   3. ISO2
   4. ISO3
   5. Indicator
   6. Unit
   7. Source
   8. CTS Code
   9. CTS Name
  10. CTS Full Descriptor
  11. 1961
  12. 1962
  13. 1963
  14. 1964
  15. 1965
  16. 1966
  17. 1967
  18. 1968
  19. 1969
  20. 1970
  21. 1971
  22. 1972
  23. 1973
  24. 1974
  25. 1975
  26. 1976
  27. 1977
  28. 1978
  29. 1979
  30. 1980
  31. 1981
  32. 1982
  33. 1983
  34. 1984
  35. 1985
  36. 1986
  37. 1987
  38. 1988
  39. 1989
  40. 1990
  41. 1991
  42. 1992
  43. 1993
  44. 1994
  45. 1995
  46. 1996
  47. 1997
  48. 1998
  49. 1999
  50. 2000
  51. 2001
  52. 2002
  53. 2003
  54. 2004
  55. 2005
  56. 2006
  57. 2007
  58. 2008
  59. 2009
  60. 2010
  61. 2011
  62. 2012
  63. 2013
  64. 2014
  65. 2015
  66. 2016
  67. 2017
  68. 2018
  69. 2019
  70. 2020
  71. 2021
  72. 2022
  73. 2023
  74. 2024

Primeres files d

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Unit,Source,CTS Code,CTS Name,CTS Full Descriptor,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,1,"Afghanistan, Islamic Rep. of",AF,AFG,"Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980",Degree Celsius,"Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET.",ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weather, Surface Temperature Change",-0.096,-0.143,0.848,-0.762,-0.233,0.239,-0.355,-0.402,-0.528,0.828,0.628,-1.116,0.24,-0.484,-0.469,-0.292,0.478,0.028,0.335,0.585,0.479,-0.356,0.147,0.129,0.25,-0.129,0.38,0.918,-0.124,0.786,-0.149,-0.262,0.175,0.461,0.405,-0.119,0.425,0.577,1.167,1.0,1.276,1.355,0.552,1.422,0.419,1.799,0.699,0.732,0.958,1.666,1.455,0.267,1.385,0.484,1.139,1.588,1.63,1.597,0.951,0.552,1.418,1.967,1.748,2.188
1,2,Africa,,AFRTMP,"Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980",Degree Celsius,"Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET.",ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weather, Surface Temperature Change",-0.015,-0.033,0.069,-0.149,-0.194,0.142,-0.212,-0.225,0.359,0.219,-0.205,0.0,0.376,-0.235,-0.237,-0.259,0.233,0.076,0.436,0.368,0.111,0.23,0.422,0.331,0.295,0.293,0.684,0.615,0.007,0.584,0.394,0.101,0.395,0.426,0.564,0.541,0.586,0.977,0.648,0.484,0.668,0.899,1.028,0.851,1.1,0.947,0.817,0.725,0.973,1.51,0.906,0.758,1.009,1.011,1.195,1.385,1.158,1.15,1.29,1.177,1.4,1.014,1.485,1.75
2,3,Albania,AL,ALB,"Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980",Degree Celsius,"Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET.",ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weather, Surface Temperature Change",0.643,0.351,0.089,-0.154,-0.377,0.565,-0.071,0.085,-0.009,-0.114,-0.208,-0.088,-0.313,-0.147,-0.223,-0.698,0.526,-0.824,0.196,-0.418,-0.351,0.174,-0.12,-0.267,-0.098,0.574,-0.105,0.379,-0.056,0.797,-0.277,0.087,0.068,1.314,-0.187,-0.039,0.079,0.795,0.686,1.065,1.547,0.513,1.026,0.472,0.2,0.402,1.377,1.02,0.944,1.241,1.136,1.559,1.392,1.332,1.702,1.608,1.239,2.155,1.797,1.627,1.719,1.701,2.299,2.925
3,4,Algeria,DZ,DZA,"Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980",Degree Celsius,"Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET.",ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weather, Surface Temperature Change",0.154,0.105,0.072,0.248,-0.104,0.424,0.0,-0.065,0.275,0.111,-0.391,-0.356,-0.03,-0.506,-0.57,-0.816,0.539,0.054,0.691,0.244,0.218,0.396,0.545,0.025,0.532,0.33,1.03,1.397,0.45,1.339,0.103,-0.232,0.643,0.828,0.689,0.923,1.112,1.163,1.51,0.862,1.889,1.288,1.621,1.024,1.293,1.432,1.254,1.219,0.969,2.272,1.407,1.171,1.213,1.705,1.134,1.761,1.506,1.203,1.24,2.006,2.424,1.804,2.303,2.824
4,5,American Samoa,AS,ASM,"Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980",Degree Celsius,"Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET.",ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weather, Surface Temperature Change",0.075,-0.047,0.161,-0.146,-0.538,0.21,-0.339,-0.164,0.153,-0.023,-0.449,-0.042,0.357,-0.271,-0.08,-0.139,0.187,0.124,0.372,0.362,0.19,0.289,0.322,0.286,0.264,0.403,0.36,0.513,0.149,0.505,0.645,0.363,-0.041,0.236,0.788,0.803,,,0.264,0.66,0.938,1.185,0.739,0.285,0.934,0.536,0.969,0.561,0.875,1.007,0.704,0.734,1.026,0.898,0.738,1.268,1.163,0.917,1.268,1.159,0.997,0.984,1.01,1.588



Valors únics de la columna 'Indicator':
Total únics: 1
Valors: ['Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980']

Valors únics de la columna 'Unit':
Total únics: 1
Valors: ['Degree Celsius']

Valors únics de la columna 'Source':
Total únics: 1
Valors: ['Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from:\xa0https://www.fao.org/faostat/en/#data/ET.']

Valors únics de la columna 'CTS Name':
Total únics: 1
Valors: ['Surface Temperature Change']

Valors únics de la columna 'CTS Full Descriptor':
Total únics: 1
Valors: ['Environment, Climate Change, Climate and Weather, Surface Temperature Change']


### Transformació de Climate_Indicators_Annual_Mean_Global_Surface_Temperature

Transformem el dataframe per eliminar columnes amb valors únics i fer unpivot de les columnes d'anys per tenir una estructura més útil per al merge final.


In [49]:
# Identificar columnes amb valors únics per eliminar-les
cols_to_drop = ['ObjectId']
for col in climate_indicators_df.columns:
    if climate_indicators_df[col].nunique() == 1:
        cols_to_drop.append(col)
        print(f"Columna '{col}' té un únic valor: {climate_indicators_df[col].iloc[0]}")

print(f"\nColumnes a eliminar: {cols_to_drop}")

# Mantenir només les columnes necessàries: Country, ISO2, ISO3 i les columnes d'anys
cols_to_keep = ['Country', 'ISO2', 'ISO3']
# Identificar columnes d'anys (columnes numèriques que no són les que volem mantenir)
year_cols = [col for col in climate_indicators_df.columns 
             if col not in cols_to_keep + cols_to_drop]

print(f"\nColumnes d'anys identificades: {len(year_cols)} columnes")
print(f"Primers anys: {year_cols[:10] if len(year_cols) > 10 else year_cols}")

# Crear dataframe amb només les columnes necessàries
climate_indicators_clean = climate_indicators_df[cols_to_keep + year_cols].copy()

# Fer unpivot (melt) de les columnes d'anys
climate_indicators_melted = climate_indicators_clean.melt(
    id_vars=cols_to_keep,
    value_vars=year_cols,
    var_name='Year',
    value_name='surface_temperature_change_celsius'
)

# Convertir Year a numèric
climate_indicators_melted['Year'] = pd.to_numeric(climate_indicators_melted['Year'], errors='coerce')

# Eliminar files amb valors NaN
climate_indicators_melted = climate_indicators_melted.dropna(subset=['surface_temperature_change_celsius', 'Year'])

print(f"\nDimensions després de la transformació: {climate_indicators_melted.shape[0]:,} files x {climate_indicators_melted.shape[1]} columnes")
print(f"\nColumnes finals: {list(climate_indicators_melted.columns)}")
print(f"\nPrimeres files:")
display(climate_indicators_melted.head(10))


Columna 'Indicator' té un únic valor: Temperature change with respect to a baseline climatology, corresponding to the period 1951-1980
Columna 'Unit' té un únic valor: Degree Celsius
Columna 'Source' té un únic valor: Food and Agriculture Organization of the United Nations (FAO). 2022. FAOSTAT Climate Change, Climate Indicators, Temperature change. License: CC BY-NC-SA 3.0 IGO. Extracted from: https://www.fao.org/faostat/en/#data/ET.
Columna 'CTS Code' té un únic valor: ECCS
Columna 'CTS Name' té un únic valor: Surface Temperature Change
Columna 'CTS Full Descriptor' té un únic valor: Environment, Climate Change, Climate and Weather, Surface Temperature Change

Columnes a eliminar: ['ObjectId', 'Indicator', 'Unit', 'Source', 'CTS Code', 'CTS Name', 'CTS Full Descriptor']

Columnes d'anys identificades: 64 columnes
Primers anys: ['1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970']

Dimensions després de la transformació: 13,267 files x 5 columnes

Columnes fi

Unnamed: 0,Country,ISO2,ISO3,Year,surface_temperature_change_celsius
0,"Afghanistan, Islamic Rep. of",AF,AFG,1961,-0.096
1,Africa,,AFRTMP,1961,-0.015
2,Albania,AL,ALB,1961,0.643
3,Algeria,DZ,DZA,1961,0.154
4,American Samoa,AS,ASM,1961,0.075
5,Americas,,AMETMP,1961,0.058
6,"Andorra, Principality of",AD,AND,1961,0.736
7,Angola,AO,AGO,1961,0.033
8,Anguilla,AI,AIA,1961,-0.029
9,Antigua and Barbuda,AG,ATG,1961,0.013


### Anàlisi de World_Development_Indicators


### Transformació de World_Development_Indicators

Transformem el dataframe per tenir una estructura útil amb indicadors agrícoles per país i any. Eliminem registres no vàlids, fem unpivot de les columnes d'anys i pivotem per tenir una columna per cada indicador.


In [50]:
# Analitzar World_Development_Indicators
wdi_df = analitzar_climate_df(
    df=data_dict["climate_data"]["World_Development_Indicators"],
    df_name="World_Development_Indicators",
    cols_to_show_unique=['Series Name'],
)


Anàlisi del dataset: World_Development_Indicators

Dimensions: 1,875 files x 69 columnes

Columnes (69):
   1. Country Name
   2. Country Code
   3. Series Name
   4. Series Code
   5. 1960 [YR1960]
   6. 1961 [YR1961]
   7. 1962 [YR1962]
   8. 1963 [YR1963]
   9. 1964 [YR1964]
  10. 1965 [YR1965]
  11. 1966 [YR1966]
  12. 1967 [YR1967]
  13. 1968 [YR1968]
  14. 1969 [YR1969]
  15. 1970 [YR1970]
  16. 1971 [YR1971]
  17. 1972 [YR1972]
  18. 1973 [YR1973]
  19. 1974 [YR1974]
  20. 1975 [YR1975]
  21. 1976 [YR1976]
  22. 1977 [YR1977]
  23. 1978 [YR1978]
  24. 1979 [YR1979]
  25. 1980 [YR1980]
  26. 1981 [YR1981]
  27. 1982 [YR1982]
  28. 1983 [YR1983]
  29. 1984 [YR1984]
  30. 1985 [YR1985]
  31. 1986 [YR1986]
  32. 1987 [YR1987]
  33. 1988 [YR1988]
  34. 1989 [YR1989]
  35. 1990 [YR1990]
  36. 1991 [YR1991]
  37. 1992 [YR1992]
  38. 1993 [YR1993]
  39. 1994 [YR1994]
  40. 1995 [YR1995]
  41. 1996 [YR1996]
  42. 1997 [YR1997]
  43. 1998 [YR1998]
  44. 1999 [YR1999]
  45. 2000 [YR2000]
 

Unnamed: 0,Country Name,Country Code,Series Name,Series Code,1960 [YR1960],1961 [YR1961],1962 [YR1962],1963 [YR1963],1964 [YR1964],1965 [YR1965],1966 [YR1966],1967 [YR1967],1968 [YR1968],1969 [YR1969],1970 [YR1970],1971 [YR1971],1972 [YR1972],1973 [YR1973],1974 [YR1974],1975 [YR1975],1976 [YR1976],1977 [YR1977],1978 [YR1978],1979 [YR1979],1980 [YR1980],1981 [YR1981],1982 [YR1982],1983 [YR1983],1984 [YR1984],1985 [YR1985],1986 [YR1986],1987 [YR1987],1988 [YR1988],1989 [YR1989],1990 [YR1990],1991 [YR1991],1992 [YR1992],1993 [YR1993],1994 [YR1994],1995 [YR1995],1996 [YR1996],1997 [YR1997],1998 [YR1998],1999 [YR1999],2000 [YR2000],2001 [YR2001],2002 [YR2002],2003 [YR2003],2004 [YR2004],2005 [YR2005],2006 [YR2006],2007 [YR2007],2008 [YR2008],2009 [YR2009],2010 [YR2010],2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020],2021 [YR2021],2022 [YR2022],2023 [YR2023],2024 [YR2024]
0,Afghanistan,AFG,"Agriculture, forestry, and fishing, value added (% of GDP)",NV.AGR.TOTL.ZS,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,38.6278918638443,37.4188554431481,29.7210671376957,31.114854912062,28.6359685844686,30.1050113574013,24.8922700059742,29.2975010547259,26.2100685374552,23.7436639877543,24.390873626371,22.8106627361766,22.1370413719574,20.6343227166798,25.7403140364582,26.4201990834448,22.0428967527575,25.7739707394105,29.9755825210461,33.5976188725451,33.7014323213923,34.7432471445174,..
1,Afghanistan,AFG,"Agriculture, forestry, and fishing, value added (annual % growth)",NV.AGR.TOTL.KD.ZG,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,2.7983102944344,-22.2363560541923,17.6981031760703,-5.10925115291374,14.7004462834181,-14.9163297643984,44.5629549189029,-6.3751344936499,-7.59232353439185,18.2247894586686,0.0052674137791939,-0.10070510676627,-5.65721785186237,5.97175186192254,6.41105900151825,-4.41531136727946,17.4598248147288,5.9423403878575,-9.7806339651292,-6.56407790334215,2.22486704408989,..
2,Afghanistan,AFG,"Agriculture, forestry, and fishing, value added (constant 2015 US$)",NV.AGR.TOTL.KD,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,3251434260.06993,3342419479.68623,2599187183.35852,3059194012.80851,2902892107.43922,3329630202.35891,2832971581.44005,4095427430.14251,3834338423.38109,3543223044.8745,4188967984.85391,4189188635.13075,4184969908.2431,3948217043.4989,4183994768.30679,4452233141.52337,4255653185.52791,4998682776.44351,5295721521.92899,4777766384.05654,4464150076.56738,4563471480.41964,..
3,Afghanistan,AFG,Agricultural irrigated land (% of total agricultural land),AG.LND.IRIG.AG.ZS,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,..,5.66212461965868,4.61762370997618,7.26428043501214,5.49989453701751,5.83906954664135,6.08546557636508,5.93975840059081,5.77856313957168,4.84228294123853,5.00039560091779,5.39100619807464,5.46499973624519,5.51833289369559,5.74254814033237,5.71089422316012,6.4811395410182,5.99050382484833,5.1223362273086,6.00631412786109,6.50692976274372,6.32161407355206,5.82830892908412,5.97911638157204,..
4,Afghanistan,AFG,Agricultural land (% of land area),AG.LND.AGRI.ZS,..,57.878355794735,57.9550158686353,58.0316759425356,58.1160020238259,58.123668031216,58.1926620977263,58.2294589331984,58.2309921346764,58.2555233583245,58.2708553731046,58.3168514174448,58.3352498351808,58.3370896769544,58.3387761985803,58.3387761985803,58.3383162381369,58.3383162381369,58.3383162381369,58.3367830366588,58.3367830366588,58.3429158425709,58.3444490440489,58.3444490440489,58.3444490440489,58.3444490440489,58.3444490440489,58.3306502307468,58.3229842233568,58.3229842233568,58.3229842233568,58.3076522085767,58.3076522085767,58.1604648666881,57.9749474878494,57.8982874139491,57.889088205081,57.9473498612453,58.0592735691397,57.8998206154271,57.9458166597673,57.9473498612453,57.9396838538552,58.0838047927878,58.1512656578201,58.134400441562,58.123668031216,58.129800837128,58.132867240084,58.132867240084,58.134400441562,58.131334038606,58.129800837128,58.123668031216,58.123668031216,58.123668031216,58.123668031216,58.123668031216,58.2769881790166,58.2769881790166,58.7415482268525,58.7415482268525,58.7415482268525,58.4395075356853,..



Valors únics de la columna 'Series Name':
Total únics: 8
Valors: ['Agricultural irrigated land (% of total agricultural land)', 'Agricultural land (% of land area)', 'Agriculture, forestry, and fishing, value added (% of GDP)', 'Agriculture, forestry, and fishing, value added (annual % growth)', 'Agriculture, forestry, and fishing, value added (constant 2015 US$)', 'Arable land (% of land area)', 'Arable land (hectares per person)', 'Indicator Name']


In [51]:
# Eliminar registres on Series Name = 'Indicator Name'
wdi_clean = wdi_df[wdi_df['Series Name'] != 'Indicator Name'].copy()
print(f"Registres eliminats: {len(wdi_df) - len(wdi_clean)} (Series Name = 'Indicator Name')")
print(f"Registres restants: {len(wdi_clean)}")

# Identificar columnes d'anys (formato "1960 [YR1960]")
year_cols = [col for col in wdi_clean.columns if '[YR' in str(col)]
print(f"\nColumnes d'anys identificades: {len(year_cols)}")

# Columnes a mantenir com id_vars
id_cols = ['Country Name', 'Country Code', 'Series Name', 'Series Code']

# Fer unpivot (melt) de les columnes d'anys
wdi_melted = wdi_clean.melt(
    id_vars=id_cols,
    value_vars=year_cols,
    var_name='Year_Col',
    value_name='Value'
)

# Extraure l'any del format "1960 [YR1960]"
wdi_melted['Year'] = wdi_melted['Year_Col'].str.extract(r'(\d{4})').astype(float)
wdi_melted = wdi_melted.drop(columns=['Year_Col', 'Series Code'])

# Convertir '..' a NaN
wdi_melted['Value'] = wdi_melted['Value'].replace('..', np.nan)
wdi_melted = wdi_melted[
    wdi_melted['Year'].notna()
].copy()

# Convertir Value a numèric
wdi_melted['Value'] = pd.to_numeric(wdi_melted['Value'], errors='coerce')
wdi_melted = wdi_melted.dropna(subset=['Value'])

print(f"\nDimensions després del melt: {wdi_melted.shape[0]:,} files x {wdi_melted.shape[1]} columnes")

# Pivotar per Series Name per tenir una columna per cada indicador
wdi_pivot = wdi_melted.pivot_table(
    index=['Country Name', 'Country Code', 'Year'],
    columns='Series Name',
    values='Value',
    aggfunc='first'
).reset_index()

print(f"\nDimensions després del pivot: {wdi_pivot.shape[0]:,} files x {wdi_pivot.shape[1]} columnes")

# Renombrar columnes segons el format especificat
column_mapping = {
    'Country Name': 'Country',
    'Country Code': 'ISO3',
    'Agricultural irrigated land (% of total agricultural land)': 'agricultural_irrigated_land_%_of_total_agri_land',
    'Agricultural land (% of land area)': 'agricultural_land_%_total_land',
    'Arable land (% of land area)': 'arable_land_%_total_land',
    'Arable land (hectares per person)': 'arable_land_ha_per_person',
    'Agriculture, forestry, and fishing, value added (% of GDP)': 'agriculture_forestry_fishing_value_added_%_GDP',
    'Agriculture, forestry, and fishing, value added (annual % growth)': 'agriculture_forestry_fishing_value_added_%_annual_growth',
    'Agriculture, forestry, and fishing, value added (constant 2015 US$)': 'agriculture_forestry_fishing_value_added_constant_2015_US$'
}

wdi_final = wdi_pivot.rename(columns=column_mapping)

# Seleccionar només les columnes que volem mantenir
cols_to_keep = [
    'ISO3',
    'Year',
    'agricultural_irrigated_land_%_of_total_agri_land',
    'agricultural_land_%_total_land',
    'arable_land_%_total_land',
    'arable_land_ha_per_person',
    'agriculture_forestry_fishing_value_added_%_GDP',
    'agriculture_forestry_fishing_value_added_%_annual_growth',
    'agriculture_forestry_fishing_value_added_%_constant_2015_US$'
]

# Mantenir només les columnes que existeixen
cols_to_keep = [col for col in cols_to_keep if col in wdi_final.columns]
wdi_final = wdi_final[cols_to_keep].copy()

print(f"\nDimensions finals: {wdi_final.shape[0]:,} files x {wdi_final.shape[1]} columnes")
print(f"\nColumnes finals: {list(wdi_final.columns)}")
print(f"\nPrimeres files:")
display(wdi_final[(wdi_final['ISO3'] == 'ESP')& (wdi_final['Year'] >= 2015)].head(10))

Registres eliminats: 1 (Series Name = 'Indicator Name')
Registres restants: 1874

Columnes d'anys identificades: 65

Dimensions després del melt: 77,768 files x 5 columnes

Dimensions després del pivot: 15,603 files x 10 columnes

Dimensions finals: 15,603 files x 8 columnes

Columnes finals: ['ISO3', 'Year', 'agricultural_irrigated_land_%_of_total_agri_land', 'agricultural_land_%_total_land', 'arable_land_%_total_land', 'arable_land_ha_per_person', 'agriculture_forestry_fishing_value_added_%_GDP', 'agriculture_forestry_fishing_value_added_%_annual_growth']

Primeres files:


Series Name,ISO3,Year,agricultural_irrigated_land_%_of_total_agri_land,agricultural_land_%_total_land,arable_land_%_total_land,arable_land_ha_per_person,agriculture_forestry_fishing_value_added_%_GDP,agriculture_forestry_fishing_value_added_%_annual_growth
13076,ESP,2015.0,13.539857,53.189062,24.573101,0.26449,2.72327,4.831547
13077,ESP,2016.0,13.75977,52.577247,24.695895,0.265555,2.815221,4.503758
13078,ESP,2017.0,13.933547,52.638714,24.529741,0.263119,2.803447,-3.486742
13079,ESP,2018.0,14.145419,52.40821,23.787368,0.254035,2.701283,4.193773
13080,ESP,2019.0,14.726331,52.458492,23.644962,0.250694,2.519801,-2.770201
13081,ESP,2020.0,14.767737,52.331679,23.299456,0.245767,2.78468,-1.979426
13082,ESP,2021.0,14.899014,52.484894,23.112438,0.243447,2.768249,7.044045
13083,ESP,2022.0,14.25645,53.357281,23.394513,0.244643,2.309576,-20.26738
13084,ESP,2023.0,15.115416,49.575327,23.007434,0.237792,2.499726,6.44981
13085,ESP,2024.0,,,,,2.539037,8.260702


### Anàlisi de annual_mean_historical_pr (Precipitació)


In [52]:
# Analitzar annual_mean_historical_pr (precipitació)
pr_df = analitzar_climate_df(
    df=data_dict["climate_data"]["annual_mean_historical_pr"],
    df_name="annual_mean_historical_pr",
    cols_to_show_unique=[],
)


Anàlisi del dataset: annual_mean_historical_pr

Dimensions: 246 files x 126 columnes

Columnes (126):
   1. code
   2. name
   3. 1901-07
   4. 1902-07
   5. 1903-07
   6. 1904-07
   7. 1905-07
   8. 1906-07
   9. 1907-07
  10. 1908-07
  11. 1909-07
  12. 1910-07
  13. 1911-07
  14. 1912-07
  15. 1913-07
  16. 1914-07
  17. 1915-07
  18. 1916-07
  19. 1917-07
  20. 1918-07
  21. 1919-07
  22. 1920-07
  23. 1921-07
  24. 1922-07
  25. 1923-07
  26. 1924-07
  27. 1925-07
  28. 1926-07
  29. 1927-07
  30. 1928-07
  31. 1929-07
  32. 1930-07
  33. 1931-07
  34. 1932-07
  35. 1933-07
  36. 1934-07
  37. 1935-07
  38. 1936-07
  39. 1937-07
  40. 1938-07
  41. 1939-07
  42. 1940-07
  43. 1941-07
  44. 1942-07
  45. 1943-07
  46. 1944-07
  47. 1945-07
  48. 1946-07
  49. 1947-07
  50. 1948-07
  51. 1949-07
  52. 1950-07
  53. 1951-07
  54. 1952-07
  55. 1953-07
  56. 1954-07
  57. 1955-07
  58. 1956-07
  59. 1957-07
  60. 1958-07
  61. 1959-07
  62. 1960-07
  63. 1961-07
  64. 1962-07
  65. 19

Unnamed: 0,code,name,1901-07,1902-07,1903-07,1904-07,1905-07,1906-07,1907-07,1908-07,1909-07,1910-07,1911-07,1912-07,1913-07,1914-07,1915-07,1916-07,1917-07,1918-07,1919-07,1920-07,1921-07,1922-07,1923-07,1924-07,1925-07,1926-07,1927-07,1928-07,1929-07,1930-07,1931-07,1932-07,1933-07,1934-07,1935-07,1936-07,1937-07,1938-07,1939-07,1940-07,1941-07,1942-07,1943-07,1944-07,1945-07,1946-07,1947-07,1948-07,1949-07,1950-07,1951-07,1952-07,1953-07,1954-07,1955-07,1956-07,1957-07,1958-07,1959-07,1960-07,1961-07,1962-07,1963-07,1964-07,1965-07,1966-07,1967-07,1968-07,1969-07,1970-07,1971-07,1972-07,1973-07,1974-07,1975-07,1976-07,1977-07,1978-07,1979-07,1980-07,1981-07,1982-07,1983-07,1984-07,1985-07,1986-07,1987-07,1988-07,1989-07,1990-07,1991-07,1992-07,1993-07,1994-07,1995-07,1996-07,1997-07,1998-07,1999-07,2000-07,2001-07,2002-07,2003-07,2004-07,2005-07,2006-07,2007-07,2008-07,2009-07,2010-07,2011-07,2012-07,2013-07,2014-07,2015-07,2016-07,2017-07,2018-07,2019-07,2020-07,2021-07,2022-07,2023-07,2024-07
0,ABW,Aruba (Neth.),420.9,420.9,420.9,420.9,420.9,420.9,420.9,420.9,420.9,420.9,569.7,487.0,413.3,393.8,379.9,442.3,447.7,434.3,253.3,324.1,363.1,503.1,367.7,574.0,275.7,446.4,624.1,486.3,342.8,305.5,498.5,475.7,535.4,439.2,483.3,455.9,558.6,544.8,430.1,288.4,218.8,620.7,443.2,479.4,305.1,378.8,314.1,310.7,438.5,574.5,393.0,375.0,306.4,380.3,535.6,599.5,303.8,219.2,274.4,376.5,403.5,342.2,477.0,328.2,373.9,667.6,350.0,316.5,617.1,612.2,298.7,455.9,446.4,384.6,513.5,261.4,217.7,300.6,563.8,311.6,648.3,334.8,272.9,413.0,641.3,354.2,391.5,591.2,362.1,463.2,310.4,383.6,290.6,367.6,383.9,501.8,254.3,426.2,707.5,484.3,346.3,266.8,392.9,643.2,658.1,500.8,418.1,624.2,333.3,697.9,684.3,357.1,327.6,282.4,281.3,440.5,375.6,467.2,218.3,394.2,420.9,420.9,415.4,444.8
1,AFG,Afghanistan,289.67,269.7,331.11,315.35,285.52,305.15,314.33,332.24,301.74,316.7,313.92,310.5,296.92,360.02,281.05,292.44,197.97,307.63,324.47,313.3,304.11,315.73,338.86,381.61,258.26,321.97,264.55,287.12,289.35,307.49,383.95,303.98,339.24,342.76,328.14,303.7,318.28,319.73,355.71,297.51,319.02,358.89,351.29,314.75,345.01,252.41,268.91,301.61,297.93,295.53,330.1,326.03,383.89,343.19,295.31,363.04,443.93,326.6,368.82,363.89,345.95,277.44,351.4,335.61,377.7,295.4,380.33,357.1,372.29,242.56,201.95,415.69,285.06,313.66,337.12,375.29,310.99,306.62,351.83,357.34,365.37,429.05,319.31,276.26,248.02,329.15,316.73,334.57,324.8,361.26,429.73,403.99,329.92,337.75,313.38,330.82,333.33,372.4,315.96,240.34,233.14,311.86,343.45,312.42,374.2,349.02,321.83,290.55,390.4,297.47,321.52,374.7,368.17,305.86,407.7,368.4,307.58,274.83,396.28,395.26,232.26,300.98,292.42,355.05
2,AGO,Angola,1021.18,1038.05,1060.13,980.71,978.53,1067.39,1019.09,1024.27,1155.25,1053.63,978.89,1143.7,984.0,889.28,928.09,1102.03,982.84,1011.1,1012.73,1031.54,1062.74,993.81,1209.13,1092.34,1072.55,1088.6,1014.75,957.96,1012.23,882.28,1043.35,923.49,1074.49,1154.88,1054.82,1020.31,1158.03,1047.37,1020.68,1003.97,932.48,980.97,983.41,1182.4,1109.45,976.63,1102.93,992.31,991.6,1171.69,1221.16,1053.58,1001.3,1119.38,1086.43,1041.11,1167.36,911.78,1087.92,1173.81,1236.29,1135.08,1117.91,959.81,996.79,1133.91,1199.32,1126.73,1220.3,965.98,999.1,984.89,1064.23,1094.06,1041.18,1105.11,1111.27,1045.71,1123.22,941.74,886.16,975.57,947.99,964.94,987.32,1134.7,995.33,1091.41,971.6,1035.04,1078.69,930.54,1034.39,984.49,955.04,989.76,1079.46,1029.85,1060.74,1053.39,1043.69,1076.96,1022.52,1059.9,1011.95,1083.29,1045.72,1061.3,1062.04,1039.62,1087.0,1071.8,1004.39,1052.57,993.92,1013.76,1070.29,1056.66,1063.28,1066.74,1058.24,1057.69,1053.24,1027.8
3,AIA,Anguilla (U.K.),1227.3,1096.2,1028.9,1072.0,1097.2,1018.0,970.4,1033.7,1120.9,1114.5,1102.6,944.6,1003.6,1026.6,1065.9,1132.0,947.4,956.3,1044.7,976.1,1014.6,1010.1,890.5,1053.6,912.1,957.8,1094.8,1207.5,962.4,922.9,1327.3,1178.8,1148.5,1011.6,994.2,1181.3,895.1,1092.4,962.1,1055.4,1014.7,1139.5,1050.3,1072.7,898.5,940.7,888.5,1083.2,1090.4,1102.5,990.3,1219.5,978.5,1122.7,1034.2,1087.6,848.8,1221.0,937.3,1273.8,1038.9,1021.1,980.4,780.5,1081.7,945.7,684.0,839.6,1258.2,1208.7,952.2,827.8,806.1,975.1,1061.3,809.8,1012.6,877.5,1530.4,762.7,1165.4,954.8,991.1,1118.8,976.1,1036.5,1271.4,1090.8,1104.7,963.2,825.8,1181.6,846.4,762.1,905.4,953.5,898.2,1312.5,1131.2,742.0,1042.5,998.6,1169.2,1224.1,1321.0,1377.3,1034.7,1305.0,956.9,1305.0,1137.2,848.5,1172.0,1163.6,629.3,1088.4,1154.2,949.4,990.0,1040.5,891.5,920.0,1055.4,1391.2
4,ALA,ALA,365.26,475.98,569.3,482.47,522.82,448.91,493.67,443.21,497.91,566.17,470.4,612.85,453.41,365.59,536.12,573.41,431.4,504.28,496.7,416.81,442.13,484.47,565.09,525.9,503.08,465.78,609.54,553.19,524.95,559.38,485.9,534.23,397.13,582.4,611.84,605.7,580.94,527.51,448.27,456.38,409.35,426.03,577.72,657.67,566.27,500.56,359.77,500.22,521.7,556.87,416.68,539.62,428.44,613.25,430.7,430.19,578.54,509.29,412.5,665.1,568.84,526.15,453.64,394.1,514.08,532.43,583.72,523.58,461.57,457.41,424.53,432.4,455.86,653.0,406.35,398.87,581.8,462.9,538.8,608.99,633.61,461.85,532.34,607.43,504.63,606.18,490.42,540.85,439.91,602.09,524.93,536.24,486.35,512.75,529.14,441.35,534.7,570.25,533.38,571.19,536.73,423.97,463.72,497.7,511.88,511.76,531.58,618.52,499.07,507.85,501.28,666.67,448.09,505.73,515.64,479.44,574.75,398.05,567.85,559.32,511.74,474.19,619.12,540.49


In [53]:
# Crear el dataframe a partir del diccionari
pr_raw_df = data_dict["climate_data"]["annual_mean_historical_pr"].copy()

# Reanomenar columnes 'name' a 'Country' i 'code' a 'ISO3'
pr_raw_df = pr_raw_df.rename(columns={'name': 'Country', 'code': 'ISO3'})

# Identificar les columnes d'anys (format 'YYYY-07')
any_cols = [col for col in pr_raw_df.columns if '-07' in col]

# Fer el melt per transformar els anys a files
pr_long_df = pr_raw_df.melt(
    id_vars=['Country', 'ISO3'],
    value_vars=any_cols,
    var_name='Year',
    value_name='prec_mm_per_year'
)

# Netejar la columna de l'any, traient el sufix '-07' i convertint a int
pr_long_df['Year'] = pr_long_df['Year'].str.replace('-07', '', regex=False).astype(int)

# Reordenar les columnes per claredat
pr_long_df = pr_long_df[['ISO3', 'Year', 'prec_mm_per_year']]

# Mostrar algunes files com a exemple
display(pr_long_df[(pr_long_df['ISO3'] == 'ESP') & (pr_long_df['Year'] >= 2015)].head(10))


Unnamed: 0,ISO3,Year,prec_mm_per_year
28111,ESP,2015,489.87
28357,ESP,2016,647.87
28603,ESP,2017,465.42
28849,ESP,2018,791.86
29095,ESP,2019,557.29
29341,ESP,2020,635.25
29587,ESP,2021,613.49
29833,ESP,2022,556.8
30079,ESP,2023,526.62
30325,ESP,2024,663.34


### Anàlisi de annual_mean_historical_tas (Temperatura)


In [54]:
# Analitzar annual_mean_historical_tas (temperatura)
tas_df = analitzar_climate_df(
    df=data_dict["climate_data"]["annual_mean_historical_tas"],
    df_name="annual_mean_historical_tas",
    cols_to_show_unique=[],
)


Anàlisi del dataset: annual_mean_historical_tas

Dimensions: 246 files x 126 columnes

Columnes (126):
   1. code
   2. name
   3. 1901-07
   4. 1902-07
   5. 1903-07
   6. 1904-07
   7. 1905-07
   8. 1906-07
   9. 1907-07
  10. 1908-07
  11. 1909-07
  12. 1910-07
  13. 1911-07
  14. 1912-07
  15. 1913-07
  16. 1914-07
  17. 1915-07
  18. 1916-07
  19. 1917-07
  20. 1918-07
  21. 1919-07
  22. 1920-07
  23. 1921-07
  24. 1922-07
  25. 1923-07
  26. 1924-07
  27. 1925-07
  28. 1926-07
  29. 1927-07
  30. 1928-07
  31. 1929-07
  32. 1930-07
  33. 1931-07
  34. 1932-07
  35. 1933-07
  36. 1934-07
  37. 1935-07
  38. 1936-07
  39. 1937-07
  40. 1938-07
  41. 1939-07
  42. 1940-07
  43. 1941-07
  44. 1942-07
  45. 1943-07
  46. 1944-07
  47. 1945-07
  48. 1946-07
  49. 1947-07
  50. 1948-07
  51. 1949-07
  52. 1950-07
  53. 1951-07
  54. 1952-07
  55. 1953-07
  56. 1954-07
  57. 1955-07
  58. 1956-07
  59. 1957-07
  60. 1958-07
  61. 1959-07
  62. 1960-07
  63. 1961-07
  64. 1962-07
  65. 1

Unnamed: 0,code,name,1901-07,1902-07,1903-07,1904-07,1905-07,1906-07,1907-07,1908-07,1909-07,1910-07,1911-07,1912-07,1913-07,1914-07,1915-07,1916-07,1917-07,1918-07,1919-07,1920-07,1921-07,1922-07,1923-07,1924-07,1925-07,1926-07,1927-07,1928-07,1929-07,1930-07,1931-07,1932-07,1933-07,1934-07,1935-07,1936-07,1937-07,1938-07,1939-07,1940-07,1941-07,1942-07,1943-07,1944-07,1945-07,1946-07,1947-07,1948-07,1949-07,1950-07,1951-07,1952-07,1953-07,1954-07,1955-07,1956-07,1957-07,1958-07,1959-07,1960-07,1961-07,1962-07,1963-07,1964-07,1965-07,1966-07,1967-07,1968-07,1969-07,1970-07,1971-07,1972-07,1973-07,1974-07,1975-07,1976-07,1977-07,1978-07,1979-07,1980-07,1981-07,1982-07,1983-07,1984-07,1985-07,1986-07,1987-07,1988-07,1989-07,1990-07,1991-07,1992-07,1993-07,1994-07,1995-07,1996-07,1997-07,1998-07,1999-07,2000-07,2001-07,2002-07,2003-07,2004-07,2005-07,2006-07,2007-07,2008-07,2009-07,2010-07,2011-07,2012-07,2013-07,2014-07,2015-07,2016-07,2017-07,2018-07,2019-07,2020-07,2021-07,2022-07,2023-07,2024-07
0,ABW,Aruba (Neth.),28.22,27.79,27.89,27.62,27.68,27.58,27.56,27.46,27.52,27.52,27.59,27.65,27.48,27.57,28.35,28.1,27.8,27.82,27.63,27.5,27.65,27.52,27.94,28.1,27.82,28.35,27.84,28.0,27.82,27.98,28.31,28.51,28.37,28.49,28.18,28.41,28.34,28.04,28.29,28.84,29.02,28.74,28.2,28.12,27.94,28.07,28.65,28.28,28.03,27.57,28.24,28.5,28.38,27.88,27.81,27.77,28.3,28.97,28.48,28.67,28.37,28.33,28.46,28.35,28.26,28.51,28.12,28.28,28.83,28.47,28.03,28.62,28.71,28.09,28.11,28.05,28.66,28.76,28.65,29.17,28.79,28.66,29.16,28.38,28.25,28.31,29.0,28.47,28.15,28.07,28.66,28.94,28.95,29.02,29.23,28.89,29.19,29.54,28.85,28.7,29.11,29.1,29.26,28.89,28.95,28.89,28.9,28.59,29.01,29.22,28.66,29.15,29.39,29.46,29.67,29.68,29.27,28.97,29.37,29.41,29.06,28.88,29.45,29.63
1,AFG,Afghanistan,12.78,12.98,11.81,12.13,12.02,12.5,11.89,12.21,12.71,12.22,12.02,12.5,12.47,13.05,13.63,12.11,12.79,12.17,12.18,11.37,12.19,12.81,11.87,12.06,12.72,12.46,12.44,11.85,11.66,11.57,11.78,11.9,12.14,11.57,11.85,12.39,11.77,12.53,12.64,13.17,13.72,12.78,12.06,12.83,12.1,12.71,13.28,12.6,11.75,11.41,12.25,12.39,12.74,12.05,12.91,12.36,11.48,13.0,12.38,12.23,12.65,12.33,13.21,11.63,12.59,12.86,12.29,12.31,12.11,13.1,13.06,11.17,12.68,11.88,12.18,12.17,13.12,12.78,12.74,13.05,13.13,12.09,12.95,12.39,12.96,12.48,13.12,13.57,12.43,13.25,12.7,12.42,12.71,12.95,13.1,12.63,12.95,13.33,13.8,13.59,13.93,13.78,13.27,13.92,13.34,13.81,13.5,13.46,13.72,14.09,13.5,13.06,13.92,13.17,13.75,14.27,13.86,14.13,13.81,13.15,14.31,14.46,14.61,14.24
2,AGO,Angola,21.32,21.36,21.37,21.28,21.32,21.2,21.22,21.29,21.31,21.35,21.41,21.36,21.37,21.29,21.3,21.28,21.33,21.17,21.22,21.32,21.32,21.42,21.31,21.38,21.22,21.28,21.28,21.47,21.32,21.31,21.47,21.26,21.43,21.52,21.36,21.24,21.32,21.37,21.42,21.64,21.8,21.81,21.54,21.73,21.5,21.38,21.53,21.25,21.83,21.5,21.4,21.6,21.28,21.22,21.25,21.17,21.53,21.36,21.47,21.48,21.19,21.2,21.23,21.2,21.25,21.45,21.21,21.29,21.47,21.51,21.34,21.33,21.57,21.13,21.3,21.06,21.52,21.31,21.57,21.5,21.41,21.44,21.78,21.7,21.63,21.49,21.97,21.72,21.55,21.73,21.55,21.69,21.58,21.65,21.93,21.71,21.84,22.09,21.59,21.67,21.84,21.95,22.01,21.78,22.14,21.58,21.56,21.53,21.69,21.87,21.49,21.56,21.56,21.56,21.7,21.94,21.82,21.66,21.81,21.73,21.73,21.62,21.56,21.71
3,AIA,Anguilla (U.K.),27.18,26.77,26.8,26.33,26.69,26.44,26.1,26.24,26.19,26.23,26.15,26.49,25.94,26.3,26.94,26.51,26.06,26.24,26.42,26.16,26.02,25.97,26.63,27.01,26.38,27.03,26.8,26.72,26.27,26.53,27.2,27.2,26.89,26.65,26.55,26.83,27.0,26.6,26.8,27.33,27.36,27.45,26.88,27.09,26.62,26.78,26.99,27.02,26.71,26.58,26.68,26.83,26.74,26.49,26.47,26.3,26.72,27.26,26.93,27.01,26.79,26.83,26.98,26.9,26.82,26.99,26.7,26.89,27.36,27.07,26.65,26.86,26.94,26.71,26.68,26.61,27.13,27.06,27.12,27.52,27.46,27.16,27.74,27.24,27.32,27.43,28.07,27.68,27.35,27.67,27.62,27.64,27.8,27.66,27.86,27.67,27.99,28.35,27.8,27.77,27.96,28.17,28.12,27.77,28.08,27.84,27.83,27.44,27.8,28.17,27.76,27.84,27.97,27.99,28.23,28.16,27.96,27.69,28.08,28.1,27.97,27.82,28.29,28.72
4,ALA,ALA,5.39,3.43,5.29,4.72,5.17,5.7,4.55,4.88,4.33,5.93,5.98,4.86,5.98,6.29,3.79,4.79,4.43,5.13,4.5,5.77,5.87,4.28,4.39,4.77,5.59,4.97,4.88,4.55,4.48,6.39,4.18,5.53,5.28,6.96,5.75,5.78,5.95,6.66,5.97,3.53,3.53,3.07,6.34,5.86,5.73,5.25,4.72,5.6,6.64,5.71,5.61,4.52,6.26,5.0,4.76,3.85,5.23,4.53,6.33,5.15,6.25,4.45,4.47,5.09,4.41,4.17,5.6,5.22,4.76,3.94,5.3,5.73,5.77,6.03,6.76,4.67,4.92,4.61,4.36,4.58,4.7,5.43,5.87,5.77,3.48,4.67,3.55,5.57,6.88,6.93,5.97,6.47,5.55,5.94,5.78,4.72,6.11,5.46,6.51,6.97,6.0,6.45,6.08,6.18,6.43,6.9,6.73,7.07,6.18,4.61,6.7,5.9,6.32,7.05,6.87,6.36,6.18,6.94,6.95,7.78,6.27,6.92,6.66,7.3


En aquest cas és el mateix format que l'anterior

In [55]:
# Crear el dataframe a partir del diccionari per temperatura (en català)
tas_raw_df = data_dict["climate_data"]["annual_mean_historical_tas"].copy()

# Reanomenar columnes 'name' a 'Country' i 'code' a 'ISO3'
tas_raw_df = tas_raw_df.rename(columns={'name': 'Country', 'code': 'ISO3'})

# Identificar columnes d'anys (format 'YYYY-07')
year_cols = [col for col in tas_raw_df.columns if '-07' in col]

# Fer el melt per transformar els anys a files
tas_long_df = tas_raw_df.melt(
    id_vars=['Country', 'ISO3'],
    value_vars=year_cols,
    var_name='Year',
    value_name='mean_annual_tas_deg_celsius'
)

# Netejar la columna de l'any, traient el '-07' i assegurant que sigui int
tas_long_df['Year'] = tas_long_df['Year'].str.replace('-07', '', regex=False).astype(int)

# Reordenar columnes per claredat
tas_long_df = tas_long_df[['ISO3', 'Year', 'mean_annual_tas_deg_celsius']]

# Mostrar algunes files com a exemple
display(tas_long_df[(tas_long_df['ISO3'] == 'ESP') & (tas_long_df['Year'] >= 2015)].head(10))


Unnamed: 0,ISO3,Year,mean_annual_tas_deg_celsius
28111,ESP,2015,14.52
28357,ESP,2016,14.4
28603,ESP,2017,14.69
28849,ESP,2018,14.11
29095,ESP,2019,14.37
29341,ESP,2020,14.77
29587,ESP,2021,14.44
29833,ESP,2022,15.16
30079,ESP,2023,15.16
30325,ESP,2024,15.1


In [56]:
# Utilitzar els dataframes ja processats anteriorment
print("Verificant els dataframes processats disponibles...")

def resumeix_dataframe(df, nom, col_any='Year', col_pais='ISO3'):
    df[col_any] = df[col_any].astype(int)
    any_min, any_max = df[col_any].min(), df[col_any].max()
    print(f"\n{nom}:")
    print(f"   Dimensions: {df.shape[0]:,} files x {df.shape[1]} columnes")
    print(f"   Columnes: {list(df.columns)}")
    print(f"   Rang d'anys: {any_min:.0f} - {any_max:.0f}")
    print(f"   Països únics: {df[col_pais].nunique()}")

# 1. Climate Indicators
climate_indicators = climate_indicators_melted.copy()
resumeix_dataframe(climate_indicators, "1. climate_indicators_melted")

# 2. World Development Indicators
wdi = wdi_final.copy()
resumeix_dataframe(wdi, "2. wdi_final")

# 3. Precipitació
pr = pr_long_df.copy()
resumeix_dataframe(pr, "3. pr_long_df")

# 4. Temperatura
tas = tas_long_df.copy()
resumeix_dataframe(tas, "4. tas_long_df")


Verificant els dataframes processats disponibles...

1. climate_indicators_melted:
   Dimensions: 13,267 files x 5 columnes
   Columnes: ['Country', 'ISO2', 'ISO3', 'Year', 'surface_temperature_change_celsius']
   Rang d'anys: 1961 - 2024
   Països únics: 231

2. wdi_final:
   Dimensions: 15,603 files x 8 columnes
   Columnes: ['ISO3', 'Year', 'agricultural_irrigated_land_%_of_total_agri_land', 'agricultural_land_%_total_land', 'arable_land_%_total_land', 'arable_land_ha_per_person', 'agriculture_forestry_fishing_value_added_%_GDP', 'agriculture_forestry_fishing_value_added_%_annual_growth']
   Rang d'anys: 1960 - 2024
   Països únics: 261

3. pr_long_df:
   Dimensions: 30,504 files x 3 columnes
   Columnes: ['ISO3', 'Year', 'prec_mm_per_year']
   Rang d'anys: 1901 - 2024
   Països únics: 246

4. tas_long_df:
   Dimensions: 30,504 files x 3 columnes
   Columnes: ['ISO3', 'Year', 'mean_annual_tas_deg_celsius']
   Rang d'anys: 1901 - 2024
   Països únics: 246


In [57]:
# Filtrarem tots els dataframes per tal que només quedin els anys posteriors a 1960
any_min = 1961  # ja que majoritàriament 1961 és l'any mínim consistent que em vist anteriorment

climate_indicators = climate_indicators[climate_indicators['Year'] >= any_min].reset_index(drop=True)
wdi = wdi[wdi['Year'] >= any_min].reset_index(drop=True)
pr = pr[pr['Year'] >= any_min].reset_index(drop=True)
tas = tas[tas['Year'] >= any_min].reset_index(drop=True)

print(f"\nS'han filtrat tots els dataframes perquè només incloguin anys de {any_min} endavant.")



S'han filtrat tots els dataframes perquè només incloguin anys de 1961 endavant.


In [58]:
# Merge seqüencial dels 4 datasets climàtics
print("Iniciant merge seqüencial dels datasets climàtics...")

def merge_dataframes(
    base_df: pd.DataFrame, 
    df_to_add: pd.DataFrame, 
    cols_to_merge: list[str], 
    suffix_right: str = '', 
    custom_cols: list[str] = None
) -> pd.DataFrame:
    """
    Fa un merge entre dos dataframes segons les columnes indicades.
    Si es passa custom_cols, només es seleccionen aquestes columnes de df_to_add.
    Per defecte, el sufix per a columnes coincidents es pot personalitzar amb suffix_right.
    """
    if custom_cols is not None:
        df_to_add_filtered = df_to_add[custom_cols]
    else:
        df_to_add_filtered = df_to_add
    merged_df = base_df.merge(
        df_to_add_filtered,
        on=cols_to_merge,
        how='outer',
        suffixes=('', suffix_right)
    )
    print(f"  Resultat: {merged_df.shape[0]:,} files x {merged_df.shape[1]} columnes")
    return merged_df

print("\nMerge 1: Climate_Indicators + World_Development_Indicators")
climate_country_year_df = merge_dataframes(
    climate_indicators,
    wdi,
    cols_to_merge=['ISO3', 'Year'],
    suffix_right='_wdi'
)

print("\nMerge 2: + Precipitació (pr_long_df)")
climate_country_year_df = merge_dataframes(
    climate_country_year_df,
    pr,
    cols_to_merge=['ISO3', 'Year'],
    custom_cols=['ISO3', 'Year', 'prec_mm_per_year']
)

print("\nMerge 3: + Temperatura (tas_long_df)")
climate_country_year_df = merge_dataframes(
    climate_country_year_df,
    tas,
    cols_to_merge=['ISO3', 'Year'],
    custom_cols=['ISO3', 'Year', 'mean_annual_tas_deg_celsius']
)

# Consolidar columna Country (pot haver-hi duplicats de diferents fonts)
if 'Country' in climate_country_year_df.columns and 'Country_wdi' in climate_country_year_df.columns:
    climate_country_year_df['Country'] = climate_country_year_df['Country'].fillna(climate_country_year_df['Country_wdi'])
    climate_country_year_df = climate_country_year_df.drop(columns=['Country_wdi'], errors='ignore')

# Ordenar per ISO3 i Year
climate_country_year_df = climate_country_year_df.sort_values(['ISO3', 'Year']).reset_index(drop=True)

print(f"\nDataframe final climate_country_year_df:")
print(f"  Dimensions: {climate_country_year_df.shape[0]:,} files x {climate_country_year_df.shape[1]} columnes")
print(f"  Rang d'anys: {climate_country_year_df['Year'].min():.0f} - {climate_country_year_df['Year'].max():.0f}")
print(f"  Nombre de països: {climate_country_year_df['ISO3'].nunique()}")
print(f"\nColumnes ({len(climate_country_year_df.columns)}):")
for i, col in enumerate(climate_country_year_df.columns, 1):
    print(f"  {i:2d}. {col}")

print(f"\nPrimeres files:")
display(climate_country_year_df.head())

print(f"\nExemple per un país (Spain - ESP):")
display(climate_country_year_df[(climate_country_year_df['ISO3'] == 'ESP')& (climate_country_year_df['Year'] >= 2015)].head())


Iniciant merge seqüencial dels datasets climàtics...

Merge 1: Climate_Indicators + World_Development_Indicators
  Resultat: 17,117 files x 11 columnes

Merge 2: + Precipitació (pr_long_df)
  Resultat: 19,299 files x 12 columnes

Merge 3: + Temperatura (tas_long_df)
  Resultat: 19,299 files x 13 columnes

Dataframe final climate_country_year_df:
  Dimensions: 19,299 files x 13 columnes
  Rang d'anys: 1961 - 2024
  Nombre de països: 303

Columnes (13):
   1. Country
   2. ISO2
   3. ISO3
   4. Year
   5. surface_temperature_change_celsius
   6. agricultural_irrigated_land_%_of_total_agri_land
   7. agricultural_land_%_total_land
   8. arable_land_%_total_land
   9. arable_land_ha_per_person
  10. agriculture_forestry_fishing_value_added_%_GDP
  11. agriculture_forestry_fishing_value_added_%_annual_growth
  12. prec_mm_per_year
  13. mean_annual_tas_deg_celsius

Primeres files:


Unnamed: 0,Country,ISO2,ISO3,Year,surface_temperature_change_celsius,agricultural_irrigated_land_%_of_total_agri_land,agricultural_land_%_total_land,arable_land_%_total_land,arable_land_ha_per_person,agriculture_forestry_fishing_value_added_%_GDP,agriculture_forestry_fishing_value_added_%_annual_growth,prec_mm_per_year,mean_annual_tas_deg_celsius
0,"Aruba, Kingdom of the Netherlands",AW,ABW,1961,-0.343,,11.111111,11.111111,0.035985,,,403.5,28.37
1,"Aruba, Kingdom of the Netherlands",AW,ABW,1962,-0.046,,11.111111,11.111111,0.035511,,,342.2,28.33
2,"Aruba, Kingdom of the Netherlands",AW,ABW,1963,-0.132,,11.111111,11.111111,0.035086,,,477.0,28.46
3,"Aruba, Kingdom of the Netherlands",AW,ABW,1964,0.122,,11.111111,11.111111,0.034711,,,328.2,28.35
4,"Aruba, Kingdom of the Netherlands",AW,ABW,1965,-0.33,,11.111111,11.111111,0.03437,,,373.9,28.26



Exemple per un país (Spain - ESP):


Unnamed: 0,Country,ISO2,ISO3,Year,surface_temperature_change_celsius,agricultural_irrigated_land_%_of_total_agri_land,agricultural_land_%_total_land,arable_land_%_total_land,arable_land_ha_per_person,agriculture_forestry_fishing_value_added_%_GDP,agriculture_forestry_fishing_value_added_%_annual_growth,prec_mm_per_year,mean_annual_tas_deg_celsius
5364,Spain,ES,ESP,2015,1.443,13.539857,53.189062,24.573101,0.26449,2.72327,4.831547,489.87,14.52
5365,Spain,ES,ESP,2016,1.628,13.75977,52.577247,24.695895,0.265555,2.815221,4.503758,647.87,14.4
5366,Spain,ES,ESP,2017,1.95,13.933547,52.638714,24.529741,0.263119,2.803447,-3.486742,465.42,14.69
5367,Spain,ES,ESP,2018,1.058,14.145419,52.40821,23.787368,0.254035,2.701283,4.193773,791.86,14.11
5368,Spain,ES,ESP,2019,1.437,14.726331,52.458492,23.644962,0.250694,2.519801,-2.770201,557.29,14.37


## Merge final entre `climate_country_year_df` i `agri_population_df` per formar `agri_country_year_df`

In [59]:
# Sets de países en ambos dataframes
agri_countries = set(agri_population_df['Area'].dropna().unique())
climate_countries = set(climate_country_year_df['Country'].dropna().unique())

# Países presentes en ambos
intersection = agri_countries & climate_countries
# Países solo en agri_population_df
only_agri = agri_countries - climate_countries
# Países solo en climate_country_year_df
only_climate = climate_countries - agri_countries

print(f"Países en ambos dataframes: {len(intersection)}")
print(f"Países solo en agri_population_df: {len(only_agri)}")
print(f"Países solo en climate_country_year_df: {len(only_climate)}\n")

print("Países solo en agri_population_df:")
print(sorted(only_agri))

print("\nPaíses solo en climate_country_year_df:")
print(sorted(only_climate))

Países en ambos dataframes: 143
Países solo en agri_population_df: 103
Países solo en climate_country_year_df: 88

Países solo en agri_population_df:
['Afghanistan', 'Armenia', 'Aruba', 'Australia and New Zealand', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Belarus', 'Belgium-Luxembourg', 'Bermuda', 'Bolivia (Plurinational State of)', 'Caribbean', 'Central African Republic', 'Central America', 'Central Asia', 'China', 'China, Hong Kong SAR', 'China, Taiwan Province of', 'China, mainland', 'Comoros', 'Congo', 'Croatia', 'Czechia', 'Czechoslovakia', "Côte d'Ivoire", "Democratic People's Republic of Korea", 'Democratic Republic of the Congo', 'Dominican Republic', 'Eastern Africa', 'Eastern Asia', 'Eastern Europe', 'Egypt', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Ethiopia PDR', 'European Union (27)', 'Fiji', 'French Guiana', 'Gambia', 'Iran (Islamic Republic of)', 'Kazakhstan', 'Kyrgyzstan', 'Land Locked Developing Countries (LLDCs)', "Lao People's Democratic Republi

### Ús de country_converter per harmonitzar codis de països

En lloc d'intentar harmonitzar manualment els noms de països, utilitzarem la llibreria `country_converter` per convertir els noms de països a codis ISO3 estandarditzats, que ja existeixen al dataframe climàtic.


In [60]:
def clean_agri_dataframe_and_add_iso3(df: pd.DataFrame, country_col: str = 'Area') -> pd.DataFrame:
    """
    Neteja un dataframe agrícola i afegeix la columna ISO3.
    
    Operacions realitzades:
    1. Mapeja noms de països amb variants conegudes
    2. Elimina registres d'agrupacions no-geogràfiques
    3. Converteix noms de països a codis ISO3 utilitzant country_converter
    
    Paràmetres:
    -----------
    df : pd.DataFrame
        Dataframe agrícola amb columna de països
    country_col : str
        Nom de la columna que conté els noms de països (default: 'Area')
        
    Retorna:
    --------
    pd.DataFrame
        Dataframe netejat amb columna ISO3 afegida
    """
    df = df.copy()
    
    print(f"Dimensions inicials: {df.shape[0]:,} files")
    
    # 1. Mapatges de noms de països
    country_mappings = {
        'Australia and New Zealand': 'Australia',
        'Belgium-Luxembourg': 'Belgium',
        'China, Hong Kong SAR': 'Hong Kong',
        'China, Taiwan Province of': 'Taiwan',
        'China, mainland': 'China',
        'Serbia and Montenegro': 'Serbia',
        'United Kingdom of Great Britain and Northern Ireland': 'United Kingdom'
    }
    
    df[country_col] = df[country_col].replace(country_mappings)
    print(f"Aplicats {len(country_mappings)} mapatges de noms de països")
    
    # 2. Eliminar agrupacions no-countries
    areas_to_remove = [
        'Low Income Food Deficit Countries (LIFDCs)',
        'Least Developed Countries (LDCs)',
        'Net Food Importing Developing Countries (NFIDCs)',
        'Small Island Developing States (SIDS)',
        'Yugoslav SFR',
        'USSR',
        'World',
        'Africa',
        'Eastern Africa',
        'Middle Africa',
        'Northern Africa',
        'Western Africa',
        'Americas',
        'Northern America',
        'Central America',
        'Caribbean',
        'South America',
        'Asia',
        'Central Asia',
        'Eastern Asia',
        'Southern Asia',
        'South-eastern Asia',
        'Western Asia',
        'Europe',
        'Eastern Europe',
        'Northern Europe',
        'Southern Europe',
        'Western Europe',
        'Oceania',
        'Melanesia',
        'Polynesia',
        'European Union (27)',
        'Land Locked Developing Countries (LLDCs)',
    ]
    
    initial_count = len(df)
    df = df[~df[country_col].isin(areas_to_remove)]
    removed_count = initial_count - len(df)
    print(f"Eliminats {removed_count:,} registres d'agrupacions no-geogràfiques")
    print(f"  Dimensions després de neteja: {df.shape[0]:,} files")
    
    # 3. Convertir a ISO3 utilitzant country_converter
    countries_unique = df[country_col].unique()
    print(f"\nConvertint {len(countries_unique)} països únics a codis ISO3...")
    
    iso3_codes = coco.convert(names=countries_unique, to='ISO3')
    country_to_iso3 = dict(zip(countries_unique, iso3_codes))
    
    df['ISO3'] = df[country_col].map(country_to_iso3)
    
    # Estadístiques de conversió
    successful = df['ISO3'].notna().sum()
    failed = df['ISO3'].isna().sum()
    unique_iso3 = df['ISO3'].nunique()
    
    print(f"Conversions exitoses: {successful:,} files ({unique_iso3} codis ISO3 únics)")
    if failed > 0:
        print(f"Conversions fallides: {failed:,} files")
        failed_countries = df[df['ISO3'].isna()][country_col].unique()
        print(f"  Països no convertits: {sorted(failed_countries)[:10]}")
    
    return df


def clean_climate_dataframe(df: pd.DataFrame, country_col: str = 'Country') -> pd.DataFrame:
    """
    Neteja un dataframe climàtic eliminant sufixos redundants i variants de noms.
    
    Operacions realitzades:
    1. Elimina text després de coma (e.g., ", Islamic Rep. of")
    2. Elimina text entre parèntesis
    3. Neteja espais extra
    
    Paràmetres:
    -----------
    df : pd.DataFrame
        Dataframe climàtic amb columna de països
    country_col : str
        Nom de la columna que conté els noms de països (default: 'Country')
        
    Retorna:
    --------
    pd.DataFrame
        Dataframe amb noms de països netejats
    """
    df = df.copy()
    
    print(f"Dimensions inicials: {df.shape[0]:,} files")
    print(f"Països únics inicials: {df[country_col].nunique() if country_col in df.columns else 'N/A'}")
    
    if country_col not in df.columns:
        print(f"La columna '{country_col}' no existeix al dataframe")
        return df
    
    # 1. Eliminar text després de coma (e.g., "Afghanistan, Islamic Rep. of" → "Afghanistan")
    df[country_col] = df[country_col].str.split(',').str[0]
    
    # 2. Eliminar text entre parèntesis
    df[country_col] = df[country_col].str.replace(r'\s*\([^)]*\)', '', regex=True)
    
    # 3. Neteja espais extra
    df[country_col] = df[country_col].str.strip()
    
    print(f"Noms de països netejats")
    print(f"  Països únics després de neteja: {df[country_col].nunique()}")
    
    return df


### Aplicació de la neteja als dataframes agrícoles

Apliquem la funció de neteja i conversió a ISO3 als dataframes agrícoles `agri_population_df` i `agri_prices_df`.


In [61]:
# Netejar agri_population_df i afegir ISO3
print("Nateja de agri_population_df")
agri_population_df_cleaned = clean_agri_dataframe_and_add_iso3(agri_population_df, country_col='Area')
print("\n agri_population_df netejat i amb columna ISO3 afegida\n")
display(agri_population_df_cleaned.head())


Nateja de agri_population_df
Dimensions inicials: 12,729 files
Aplicats 7 mapatges de noms de països
Eliminats 1,972 registres d'agrupacions no-geogràfiques
  Dimensions després de neteja: 10,757 files

Convertint 209 països únics a codis ISO3...
Conversions exitoses: 10,757 files (204 codis ISO3 únics)

 agri_population_df netejat i amb columna ISO3 afegida



Unnamed: 0,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha,Value_pesticide_agricultural_use_t,Value_6971_herbaceous_crops_ha,Value_6974_tree-covered_areas_ha,Value_6983_grassland_ha,Value_1709_agricultural_soils_CO2eq_AR5_kt_FAO_TIER_1,Value_5058_enteric_fermentation_CO2eq_AR5_kt_FAO_TIER_1,Value_5059_manure_management_CO2eq_AR5_kt_FAO_TIER_1,Value_5060_rice_cultivation_CO2eq_AR5_kt_FAO_TIER_1,Value_5061_synthetic_fertilizers_CO2eq_AR5_kt_FAO_TIER_1,Value_5064_crop_residues_CO2eq_AR5_kt_FAO_TIER_1,Value_5085_emissions_from_livestock_CO2eq_AR5_kt_FAO_TIER_1,Value_21085_agriculture_value_added_per_worker_constant_2015_ususd_total_USD,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_female_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_male_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_total_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_female_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_male_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_total_No,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_female_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_male_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_total_%,total_population_No,female_population_%,male_population_%,rural_population_%,urban_population_%,ISO3
0,1,'051,Armenia,1992,1992,15000.0,31.06,4.2,,5000.0,10.35,1.4,,5000.0,10.35,1.4,,2847000.0,1170000.0,483000.0,423000.0,60000.0,687000.0,8.0,822810.0,711050.0,669040.0,544.8665,751.24,64.0145,,82.7595,25.8905,1251.471,1704.51,,,,485460.0,335990.0,821450.0,64.6,40.6,52.1,3571861.0,52.16516,47.83484,31.924927,64.462083,ARM
1,1,'051,Armenia,1993,1993,15000.0,30.43,4.34,,,,,,,,,,2847000.0,1181000.0,493000.0,433000.0,60000.0,688000.0,8.0,825000.0,711110.0,669070.0,473.5815,671.5772,40.3445,,82.7595,27.6395,1075.0777,1729.96,,,,443950.0,317270.0,761220.0,65.3,41.4,52.7,3453332.0,52.238041,47.761988,32.521374,64.865672,ARM
2,1,'051,Armenia,1994,1994,7000.0,14.0,2.08,,,,,,,,,,2847000.0,1200000.0,500000.0,435000.0,65000.0,700000.0,23.0,817000.0,711170.0,669080.0,418.965,661.962,40.204,,38.637,22.6045,1059.863,865.49,,,,424800.0,301260.0,726060.0,65.7,41.6,52.9,3364610.0,52.292242,47.707758,32.857746,64.736329,ARM
3,1,'051,Armenia,1995,1995,7000.0,14.17,2.12,,,,,,,,,,2847000.0,1244000.0,494000.0,435000.0,59000.0,750000.0,24.0,845520.0,661750.0,689880.0,413.6385,653.408,40.1943,,38.637,23.373,1045.2308,919.58,,,,410500.0,299810.0,710310.0,66.0,41.9,53.1,3307581.0,52.322165,47.677865,33.016637,64.255116,ARM
4,1,'051,Armenia,1996,1996,8000.0,15.81,2.44,,,,,,,,,,2847000.0,1306000.0,506000.0,450000.0,56000.0,800000.0,32.0,854370.0,658510.0,690990.0,423.152,652.5484,39.925,,44.149,27.878,1043.6012,973.21,,,,388190.0,296400.0,684590.0,66.3,42.2,53.2,3278735.0,52.326675,47.673325,33.065618,63.563569,ARM


In [62]:
# Netejar agri_prices_df i afegir ISO3
print("Nateja de agri_prices_df")
agri_prices_df_cleaned = clean_agri_dataframe_and_add_iso3(agri_prices_df, country_col='Area')
print("\n agri_prices_df netejat i amb columna ISO3 afegida\n")
display(agri_prices_df_cleaned.head())


Nateja de agri_prices_df
Dimensions inicials: 4,116,252 files
Aplicats 7 mapatges de noms de països
Eliminats 989,642 registres d'agrupacions no-geogràfiques
  Dimensions després de neteja: 3,126,610 files

Convertint 208 països únics a codis ISO3...
Conversions exitoses: 3,126,610 files (203 codis ISO3 únics)

 agri_prices_df netejat i amb columna ISO3 afegida



Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item Code (CPC),Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Flag_Description,Value_LCU/tonne,Value_SLC/tonne,Value_USD/tonne,ISO3
0,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1961,1961,ha,0.0,A,Official figure,,,,AFG
1,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1962,1962,ha,0.0,A,Official figure,,,,AFG
2,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1963,1963,ha,0.0,A,Official figure,,,,AFG
3,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1964,1964,ha,0.0,A,Official figure,,,,AFG
4,2,'004,Afghanistan,221,'01371,"Almonds, in shell",5312,Area harvested,1965,1965,ha,0.0,A,Official figure,,,,AFG


In [63]:
display(agri_prices_df_cleaned[(agri_prices_df_cleaned['ISO3']=='ESP') & (agri_prices_df_cleaned['Year'] >= 2015)].head())

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item Code (CPC),Item,Element Code,Element,Year Code,Year,Unit,Value,Flag,Flag_Description,Value_LCU/tonne,Value_SLC/tonne,Value_USD/tonne,ISO3
2561783,203,'724,Spain,221,'01371,"Almonds, in shell",5312,Area harvested,2015,2015,ha,548604.0,A,Official figure,1854.8,1854.8,2057.1,ESP
2561784,203,'724,Spain,221,'01371,"Almonds, in shell",5312,Area harvested,2016,2016,ha,583673.0,A,Official figure,1728.2,1728.2,1911.7,ESP
2561785,203,'724,Spain,221,'01371,"Almonds, in shell",5312,Area harvested,2017,2017,ha,633562.0,A,Official figure,1462.3,1462.3,1651.9,ESP
2561786,203,'724,Spain,221,'01371,"Almonds, in shell",5312,Area harvested,2018,2018,ha,657770.0,A,Official figure,1143.8,1143.8,1350.8,ESP
2561787,203,'724,Spain,221,'01371,"Almonds, in shell",5312,Area harvested,2019,2019,ha,687230.0,A,Official figure,1331.6,1331.6,1490.7,ESP


### Aplicació de la neteja al dataframe climàtic

Apliquem la funció de neteja al dataframe climàtic `climate_country_year_df` per eliminar sufixos redundants.


In [64]:
# Netejar climate_country_year_df i afegir ISO3
print("Nateja de climate_country_year_df")
climate_country_year_df_cleaned = clean_climate_dataframe(climate_country_year_df, country_col='Country')
print("\n climate_country_year_df netejat\n")
display(climate_country_year_df_cleaned.head())


Nateja de climate_country_year_df
Dimensions inicials: 19,299 files
Països únics inicials: 231
Noms de països netejats
  Països únics després de neteja: 227

 climate_country_year_df netejat



Unnamed: 0,Country,ISO2,ISO3,Year,surface_temperature_change_celsius,agricultural_irrigated_land_%_of_total_agri_land,agricultural_land_%_total_land,arable_land_%_total_land,arable_land_ha_per_person,agriculture_forestry_fishing_value_added_%_GDP,agriculture_forestry_fishing_value_added_%_annual_growth,prec_mm_per_year,mean_annual_tas_deg_celsius
0,Aruba,AW,ABW,1961,-0.343,,11.111111,11.111111,0.035985,,,403.5,28.37
1,Aruba,AW,ABW,1962,-0.046,,11.111111,11.111111,0.035511,,,342.2,28.33
2,Aruba,AW,ABW,1963,-0.132,,11.111111,11.111111,0.035086,,,477.0,28.46
3,Aruba,AW,ABW,1964,0.122,,11.111111,11.111111,0.034711,,,328.2,28.35
4,Aruba,AW,ABW,1965,-0.33,,11.111111,11.111111,0.03437,,,373.9,28.26


## Construcció del dataframe final integrat: `agri_country_year_df`

Amb l'objectiu de crear un dataframe únic que integri tota la informació agrícola i climàtica a nivell de **país x any**, preparat per a la visualització i exploració de dades, fem:

1. **Dataframe base**: `agri_population_df_cleaned`
   - Conté tota la informació agrícola processada: producció, fertilitzants, ús del sòl, pesticides, cobertura terrestre, emissions, ocupació i població
   - Aquest serà el dataframe principal ja que conté les dades més crítiques per a l'anàlisi agrícola

2. **Dataframe complementari**: `climate_country_year_df_cleaned`
   - Conté informació climàtica i indicadors de desenvolupament

3. **Tipus de merge**: **LEFT JOIN**
   - Preservem tots els registres agrícoles (país x any)
   - Afegim informació climàtica quan estigui disponible
   - Si no hi ha dades climàtiques per un país/any específic, els valors quedaran com NaN

4. **Claus de merge**: `ISO3` + `Year`
   - Ambdós dataframes ja tenen aquests camps harmonitzats
   - Garanteix la màxima compatibilitat entre fonts de dades

### Resultat final

El dataframe `agri_country_year_df` serà guardat com a CSV a `data/visualization_input/agri_country_year_df.csv` i contindrà tota la informació necessària per a la visualització de dades.


In [66]:
# 1. Reordenar columnes de agri_population_df_cleaned per tenir ISO3 com a primera columna

print("\n1. Reordenant columnes de agri_population_df...")
# Obtenir totes les columnes
all_cols_agri = agri_population_df_cleaned.columns.tolist()

# Crear nova ordre: ISO3, Area Code, Area Code (M49), Area, Year Code, Year, resta
priority_cols = ['ISO3', 'Area Code', 'Area Code (M49)', 'Area', 'Year Code', 'Year']
other_cols = [col for col in all_cols_agri if col not in priority_cols]
new_order_agri = priority_cols + other_cols

# Reordenar
agri_population_df_cleaned = agri_population_df_cleaned[new_order_agri]

print(f"Columnes reordenades")
print(f"  Primeres columnes: {agri_population_df_cleaned.columns[:10].tolist()}")
print(f"  Dimensions: {agri_population_df_cleaned.shape[0]:,} files x {agri_population_df_cleaned.shape[1]} columnes")



1. Reordenant columnes de agri_population_df...
Columnes reordenades
  Primeres columnes: ['ISO3', 'Area Code', 'Area Code (M49)', 'Area', 'Year Code', 'Year', 'Value_nutrient_3102_N_Agricultural_Use_t', 'Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha', 'Value_nutrient_3102_N_Use_per_capita_kg/cap', 'Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$']
  Dimensions: 10,757 files x 50 columnes


In [67]:
# 2. Seleccionar només les columnes rellevants de climate_country_year_df_cleaned (eliminant 'Country' i 'ISO2')
print("\n2. Seleccionant columnes climàtiques rellevants: eliminar 'Country' i 'ISO2'...")

climate_country_year_df_for_merge = (
    climate_country_year_df_cleaned
    .drop(columns=['Country', 'ISO2'])
)

print(f"  Dimensions: {climate_country_year_df_for_merge.shape[0]:,} files x {climate_country_year_df_for_merge.shape[1]} columnes")
display(climate_country_year_df_for_merge[(climate_country_year_df_for_merge['ISO3']=='ESP') & (climate_country_year_df_for_merge['Year'] >= 2015)].head())


2. Seleccionant columnes climàtiques rellevants: eliminar 'Country' i 'ISO2'...
  Dimensions: 19,299 files x 11 columnes


Unnamed: 0,ISO3,Year,surface_temperature_change_celsius,agricultural_irrigated_land_%_of_total_agri_land,agricultural_land_%_total_land,arable_land_%_total_land,arable_land_ha_per_person,agriculture_forestry_fishing_value_added_%_GDP,agriculture_forestry_fishing_value_added_%_annual_growth,prec_mm_per_year,mean_annual_tas_deg_celsius
5364,ESP,2015,1.443,13.539857,53.189062,24.573101,0.26449,2.72327,4.831547,489.87,14.52
5365,ESP,2016,1.628,13.75977,52.577247,24.695895,0.265555,2.815221,4.503758,647.87,14.4
5366,ESP,2017,1.95,13.933547,52.638714,24.529741,0.263119,2.803447,-3.486742,465.42,14.69
5367,ESP,2018,1.058,14.145419,52.40821,23.787368,0.254035,2.701283,4.193773,791.86,14.11
5368,ESP,2019,1.437,14.726331,52.458492,23.644962,0.250694,2.519801,-2.770201,557.29,14.37


In [68]:
# 3. Fer el merge (LEFT JOIN) entre agri_population_df_cleaned i climate_country_year_df_for_merge
print("\n3. Realitzant merge LEFT JOIN (agri_population_df_cleaned + climate_country_year_df)...")
print(f"   Claus de merge: ISO3 + Year")

agri_country_year_df = agri_population_df_cleaned.merge(
    climate_country_year_df_for_merge,
    on=['ISO3', 'Year'],
    how='left',
    suffixes=('', '_climate')
)

print(f"\n Merge completat!")
print(f"  Dimensions finals: {agri_country_year_df.shape[0]:,} files x {agri_country_year_df.shape[1]} columnes")
print(f"  Rang d'anys: {agri_country_year_df['Year'].min():.0f} - {agri_country_year_df['Year'].max():.0f}")
print(f"  Països únics (ISO3): {agri_country_year_df['ISO3'].nunique()}")

display(agri_country_year_df[(agri_country_year_df['ISO3']=='ESP') & (agri_country_year_df['Year'] >= 2015)].head())



3. Realitzant merge LEFT JOIN (agri_population_df_cleaned + climate_country_year_df)...
   Claus de merge: ISO3 + Year

 Merge completat!
  Dimensions finals: 10,757 files x 59 columnes
  Rang d'anys: 1961 - 2023
  Països únics (ISO3): 204


Unnamed: 0,ISO3,Area Code,Area Code (M49),Area,Year Code,Year,Value_nutrient_3102_N_Agricultural_Use_t,Value_nutrient_3102_N_Use_per_area_of_cropland_kg/ha,Value_nutrient_3102_N_Use_per_capita_kg/cap,Value_nutrient_3102_N_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3103_P2O5_Agricultural_Use_t,Value_nutrient_3103_P2O5_Use_per_area_of_cropland_kg/ha,Value_nutrient_3103_P2O5_Use_per_capita_kg/cap,Value_nutrient_3103_P2O5_Use_per_value_of_agricultural_production_g/Int$,Value_nutrient_3104_K2O_Agricultural_Use_t,Value_nutrient_3104_K2O_Use_per_area_of_cropland_kg/ha,Value_nutrient_3104_K2O_Use_per_capita_kg/cap,Value_nutrient_3104_K2O_Use_per_value_of_agricultural_production_g/Int$,Value_6601_land_area_ha,Value_6610_agricultural_land_ha,Value_6620_cropland_ha,Value_6621_arable_land_ha,Value_6650_permanent_crops_ha,Value_6655_permanent_meadows_and_pastures_ha,Value_pesticide_agricultural_use_t,Value_6971_herbaceous_crops_ha,Value_6974_tree-covered_areas_ha,Value_6983_grassland_ha,Value_1709_agricultural_soils_CO2eq_AR5_kt_FAO_TIER_1,Value_5058_enteric_fermentation_CO2eq_AR5_kt_FAO_TIER_1,Value_5059_manure_management_CO2eq_AR5_kt_FAO_TIER_1,Value_5060_rice_cultivation_CO2eq_AR5_kt_FAO_TIER_1,Value_5061_synthetic_fertilizers_CO2eq_AR5_kt_FAO_TIER_1,Value_5064_crop_residues_CO2eq_AR5_kt_FAO_TIER_1,Value_5085_emissions_from_livestock_CO2eq_AR5_kt_FAO_TIER_1,Value_21085_agriculture_value_added_per_worker_constant_2015_ususd_total_USD,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_female_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_male_No,Value_21111_employment_in_crop_and_animal_production_hunting_and_related_service_activities_total_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_female_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_male_No,Value_21144_employment_in_agriculture_-_ilo_modelled_estimates_total_No,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_female_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_male_%,Value_21156_share_of_employment_in_agriculture_in_total_employment_-_ilo_modelled_estimates_total_%,total_population_No,female_population_%,male_population_%,rural_population_%,urban_population_%,surface_temperature_change_celsius,agricultural_irrigated_land_%_of_total_agri_land,agricultural_land_%_total_land,arable_land_%_total_land,arable_land_ha_per_person,agriculture_forestry_fishing_value_added_%_GDP,agriculture_forestry_fishing_value_added_%_annual_growth,prec_mm_per_year,mean_annual_tas_deg_celsius
8692,ESP,203,'724,Spain,2015,2015,1068103.0,62.14,22.88,21.8,411763.0,23.96,8.82,8.4,380303.0,22.13,8.15,7.76,49966100.0,26576500.0,17187890.0,12278220.0,4909670.0,9388610.0,77217.0,15811420.0,12416050.0,7421200.0,13078.174,16830.1,10081.3283,1542.3016,5893.441,1180.7605,32843.4268,43719.25,164900.0,513750.0,678710.0,171510.0,576230.0,747750.0,2.1,5.8,4.1,46683686.0,50.776556,49.223444,20.273048,79.114271,1.443,13.539857,53.189062,24.573101,0.26449,2.72327,4.831547,489.87,14.52
8693,ESP,203,'724,Spain,2016,2016,982155.0,57.66,21.02,18.79,414974.0,24.36,8.88,7.94,379007.0,22.25,8.11,7.25,49956400.0,26265700.0,17033010.0,12337180.0,4695830.0,9232690.0,76941.0,16007000.0,12471940.0,7406250.0,12845.3715,16825.0824,10290.7095,1542.0468,5419.2235,1400.366,33069.7589,43519.53,170860.0,541230.0,712020.0,178970.0,608060.0,787030.0,2.1,5.9,4.2,46732771.0,50.81104,49.18896,19.993972,79.181778,1.628,13.75977,52.577247,24.695895,0.265555,2.815221,4.503758,647.87,14.4
8694,ESP,203,'724,Spain,2017,2017,1079321.0,63.54,23.03,21.54,436452.0,25.7,9.31,8.71,389629.0,22.94,8.31,7.78,49954735.0,26295530.0,16985167.0,12253767.0,4731400.0,9310363.0,71987.0,15908920.0,12503870.0,7393520.0,13074.0135,17027.136,10481.0101,1518.5072,5955.345,1001.4085,33554.8101,39514.38,186670.0,563050.0,749790.0,196340.0,638770.0,835120.0,2.3,6.1,4.4,46868596.0,50.847262,49.152735,19.701717,79.201013,1.95,13.933547,52.638714,24.529741,0.263119,2.803447,-3.486742,465.42,14.69
8695,ESP,203,'724,Spain,2018,2018,1033494.0,61.63,21.95,17.85,425960.0,25.4,9.05,7.36,414675.0,24.73,8.81,7.16,49960347.9,26183323.9,16770363.9,11884251.9,4886112.0,9412960.0,73121.0,15597160.0,12553290.0,7377370.0,12637.0815,16996.49,10184.3688,1481.9,5702.482,1450.7955,32594.7293,42991.8,183140.0,560130.0,743270.0,191850.0,633320.0,825170.0,2.2,5.9,4.2,47092821.0,50.871336,49.128664,19.388051,79.135357,1.058,14.145419,52.40821,23.787368,0.254035,2.701283,4.193773,791.86,14.11
8696,ESP,203,'724,Spain,2019,2019,1011251.0,60.34,21.32,19.29,479846.0,28.63,10.12,9.15,369058.0,22.02,7.78,7.04,49957003.6,26206690.6,16759806.6,11812314.6,4947492.0,9446884.0,75190.0,15505760.0,12555860.0,7380330.0,12260.861,17053.0612,10311.1779,1458.758,5579.7605,1190.645,32782.0874,41247.68,177600.0,552250.0,729780.0,185260.0,624370.0,809620.0,2.0,5.7,4.0,47435119.0,50.881129,49.118869,19.027965,78.876393,1.437,14.726331,52.458492,23.644962,0.250694,2.519801,-2.770201,557.29,14.37


In [69]:
# 4. Guardem el dataframe final com a CSV
print("Guardant el dataframe final")

# Creem el directori
output_dir = '../data/visualization_input'
os.makedirs(output_dir, exist_ok=True)

# Guardem el dataframe com a CSV
output_file = os.path.join(output_dir, 'agri_country_year_df.csv')
agri_country_year_df.to_csv(output_file, index=False)

Guardant el dataframe final


## Processament i exportació de `agri_prices_df_cleaned`

El dataframe `agri_prices_df_cleaned` conté informació de producció i preus agrícoles i ramaders per país x producte x any. Aquest dataframe també és rellevant per a la visualització, ja que permet analitzar:

- L'evolució dels preus dels productes agrícoles al llarg del temps
- Comparacions entre països per a productes específics
- La relació entre preus i condicions climàtiques
- Impactes econòmics de la producció agrícola
- Altres informacions rellevants particulars de cada producte a cada pais
- Impactes en l'agrucultura i ramaderia de cada producte

A diferència de `agri_country_year_df` (que és a nivell país x any), aquest dataframe és a nivell país x producte x any, per això es guardarà com un fitxer separat per facilitar la seva utilització en visualitzacions específiques per producte.


In [70]:
# Reordenar columnes de agri_prices_df_cleaned per tenir ISO3 com a primera columna
print("Preparant agri_prices_df_cleaned per exportar")

print("\nReordenant columnes...")
# Obtenir totes les columnes
all_cols_prices = agri_prices_df_cleaned.columns.tolist()

priority_cols_prices = [
    'ISO3', 
    'Area Code', 
    'Area Code (M49)', 
    'Area', 
    'Year Code', 
    'Year',
    'Item Code', 
    'Item Code (CPC)', 
    'Item', 
]
other_cols_prices = [col for col in all_cols_prices if col not in priority_cols_prices]
new_order_prices = priority_cols_prices + other_cols_prices

# Reordenar
agri_prices_df_cleaned = agri_prices_df_cleaned[new_order_prices]

print(f"Columnes reordenades")
print(f"  Primeres columnes: {agri_prices_df_cleaned.columns[:12].tolist()}")
print(f"  Dimensions: {agri_prices_df_cleaned.shape[0]:,} files x {agri_prices_df_cleaned.shape[1]} columnes")
print(f"  Països únics (ISO3): {agri_prices_df_cleaned['ISO3'].nunique()}")
print(f"  Productes únics: {agri_prices_df_cleaned['Item'].nunique()}")
print(f"  Rang d'anys: {agri_prices_df_cleaned['Year'].min():.0f} - {agri_prices_df_cleaned['Year'].max():.0f}")

print(f"\nPrimeres files:")
display(agri_prices_df_cleaned[(agri_prices_df_cleaned['ISO3']=='ESP') & (agri_prices_df_cleaned['Year'] >= 2015)].head())


Preparant agri_prices_df_cleaned per exportar

Reordenant columnes...
Columnes reordenades
  Primeres columnes: ['ISO3', 'Area Code', 'Area Code (M49)', 'Area', 'Year Code', 'Year', 'Item Code', 'Item Code (CPC)', 'Item', 'Element Code', 'Element', 'Unit']
  Dimensions: 3,126,610 files x 18 columnes
  Països únics (ISO3): 203
  Productes únics: 302
  Rang d'anys: 1961 - 2023

Primeres files:


Unnamed: 0,ISO3,Area Code,Area Code (M49),Area,Year Code,Year,Item Code,Item Code (CPC),Item,Element Code,Element,Unit,Value,Flag,Flag_Description,Value_LCU/tonne,Value_SLC/tonne,Value_USD/tonne
2561783,ESP,203,'724,Spain,2015,2015,221,'01371,"Almonds, in shell",5312,Area harvested,ha,548604.0,A,Official figure,1854.8,1854.8,2057.1
2561784,ESP,203,'724,Spain,2016,2016,221,'01371,"Almonds, in shell",5312,Area harvested,ha,583673.0,A,Official figure,1728.2,1728.2,1911.7
2561785,ESP,203,'724,Spain,2017,2017,221,'01371,"Almonds, in shell",5312,Area harvested,ha,633562.0,A,Official figure,1462.3,1462.3,1651.9
2561786,ESP,203,'724,Spain,2018,2018,221,'01371,"Almonds, in shell",5312,Area harvested,ha,657770.0,A,Official figure,1143.8,1143.8,1350.8
2561787,ESP,203,'724,Spain,2019,2019,221,'01371,"Almonds, in shell",5312,Area harvested,ha,687230.0,A,Official figure,1331.6,1331.6,1490.7


In [71]:
agri_prices_df_cleaned[['Element', 'Unit']].drop_duplicates().sort_values(by='Element')

Unnamed: 0,Element,Unit
0,Area harvested,ha
3427,Laying,1000 An
8725,Milk Animals,An
1479,Producing Animals/Slaughtered,An
4435,Producing Animals/Slaughtered,1000 An
111,Production,t
3364,Production,1000 No
716,Stocks,An
1666,Stocks,1000 An
15538,Stocks,No


Observem que, com ja haviem mencionat a l'inici de la pràctica no podem fer pivot de les columnes per la varietat que tenim, no obstant, el que si que podem fer es estandaritzar les unitats.

In [72]:
df = agri_prices_df_cleaned.copy()

#  Cas "1000 <unit>"
mask_1000 = df['Unit'].str.contains('1000 ', na=False)

df['Value'] = np.where(
    mask_1000,
    df['Value'] * 1000,
    df['Value']
)

df['Unit'] = np.where(
    mask_1000,
    df['Unit'].str.replace('1000 ', '', regex=False),
    df['Unit']
)

# Cas "g/An", convertir a "kg/An"
mask_g_an = df['Unit'] == 'g/An'

df['Value'] = np.where(
    mask_g_an,
    df['Value'] / 1000,
    df['Value']
)

df['Unit'] = np.where(
    mask_g_an,
    'kg/An',
    df['Unit']
)

# Resultat final
agri_prices_df_to_save = df
display(agri_prices_df_to_save[
    (agri_prices_df_to_save['Unit']=='kg/An')
    &(agri_prices_df_to_save['Year']>=2015)
    &(agri_prices_df_to_save['ISO3']=='ESP')
    ].head()
)

Unnamed: 0,ISO3,Area Code,Area Code (M49),Area,Year Code,Year,Item Code,Item Code (CPC),Item,Element Code,Element,Unit,Value,Flag,Flag_Description,Value_LCU/tonne,Value_SLC/tonne,Value_USD/tonne
2569060,ESP,203,'724,Spain,2015,2015,1091,'0232,"Eggs from other birds in shell, fresh, n.e.c.",5424,Yield/Carcass Weight,kg/An,47.905,E,Estimated value,823.1,823.1,912.9
2569061,ESP,203,'724,Spain,2016,2016,1091,'0232,"Eggs from other birds in shell, fresh, n.e.c.",5424,Yield/Carcass Weight,kg/An,45.5,E,Estimated value,708.3,708.3,783.5
2569062,ESP,203,'724,Spain,2017,2017,1091,'0232,"Eggs from other birds in shell, fresh, n.e.c.",5424,Yield/Carcass Weight,kg/An,45.263,E,Estimated value,608.4,608.4,687.3
2571055,ESP,203,'724,Spain,2015,2015,1062,'0231,"Hen eggs in shell, fresh",5424,Yield/Carcass Weight,kg/An,16.832,A,Official figure,1346.3,1346.3,1493.2
2571056,ESP,203,'724,Spain,2016,2016,1062,'0231,"Hen eggs in shell, fresh",5424,Yield/Carcass Weight,kg/An,17.253,A,Official figure,1158.5,1158.5,1281.5


In [73]:
# Guardar agri_prices_df_to_save com a CSV
print("Guardant agri_prices_df_to_save...")

# Guardar com a CSV
output_file_prices = os.path.join(output_dir, 'agri_production_and_prices_df.csv')
agri_prices_df_to_save.to_csv(output_file_prices, index=False)
print(f"Guardat a {output_file_prices}...")

Guardant agri_prices_df_to_save...
Guardat a ../data/visualization_input/agri_production_and_prices_df.csv...
