# Climate and Agriculture Analysis in Europe

This notebook analyzes the impact of climate variables on agricultural production in Europe from 2000 to 2022.

## Data processing

In [2]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as pxr

# Configure plots
#plt.style.use("seaborn")
sns.set_palette("viridis")

# Paths to data files
climate_instant_path = "/workspaces/climate-agriculture-europe/data/raw/data_stream-oper_stepType-instant.nc"
climate_accum_path = "/workspaces/climate-agriculture-europe/data/raw/data_stream-oper_stepType-accum.nc"
agriculture_path = "/workspaces/climate-agriculture-europe/data/raw/agriculture_data_eu.csv" 

In [3]:
try:
    ds = xr.open_dataset(climate_instant_path)
    print(ds)
except FileNotFoundError:
    print(f"Archivo no encontrado: {climate_instant_path}")

<xarray.Dataset> Size: 2GB
Dimensions:     (valid_time: 33604, latitude: 149, longitude: 81)
Coordinates:
    number      int64 8B ...
  * valid_time  (valid_time) datetime64[ns] 269kB 2000-01-01 ... 2022-12-31T1...
  * latitude    (latitude) float64 1kB 72.0 71.75 71.5 71.25 ... 35.5 35.25 35.0
  * longitude   (longitude) float64 648B 25.0 25.25 25.5 ... 44.5 44.75 45.0
    expver      (valid_time) <U4 538kB ...
Data variables:
    t2m         (valid_time, latitude, longitude) float32 2GB ...
Attributes:
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2024-12-12T09:31 GRIB to CDM+CF via cfgrib-0.9.1...


In [4]:
try:
    ds = xr.open_dataset(climate_accum_path)
    print(ds)
except FileNotFoundError:
    print(f"Archivo no encontrado: {climate_accum_path}")

<xarray.Dataset> Size: 2GB
Dimensions:     (valid_time: 33604, latitude: 149, longitude: 81)
Coordinates:
    number      int64 8B ...
  * valid_time  (valid_time) datetime64[ns] 269kB 2000-01-01 ... 2022-12-31T1...
  * latitude    (latitude) float64 1kB 72.0 71.75 71.5 71.25 ... 35.5 35.25 35.0
  * longitude   (longitude) float64 648B 25.0 25.25 25.5 ... 44.5 44.75 45.0
    expver      (valid_time) <U4 538kB ...
Data variables:
    tp          (valid_time, latitude, longitude) float32 2GB ...
Attributes:
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2024-12-12T09:31 GRIB to CDM+CF via cfgrib-0.9.1...


Climate data processing.

In [5]:
# Load climate data
print("Loading climate data...")
instant_data = xr.open_dataset(climate_instant_path)
accum_data = xr.open_dataset(climate_accum_path)

# Extract temperature (instantaneous)
temperature = instant_data["t2m"] - 273.15  # Convert to Celsius
temperature_europe = temperature.sel(latitude=slice(72, 35), longitude=slice(-25, 45))
temperature_monthly = temperature_europe.resample(valid_time="1M").mean()

# Extract precipitation (accumulated)
precipitation = accum_data["tp"] * 1000  # Convert to mm
precipitation_europe = precipitation.sel(latitude=slice(72, 35), longitude=slice(-25, 45))
precipitation_monthly = precipitation_europe.resample(valid_time="1M").sum()

# Convert to pandas DataFrame
temperature_df = temperature_monthly.to_dataframe().reset_index()
precipitation_df = precipitation_monthly.to_dataframe().reset_index()

# Merge temperature and precipitation data
print("Combining climate variables...")
climate_data = pd.merge(
    temperature_df, 
    precipitation_df, 
    on=["valid_time", "latitude", "longitude"], 
    how="inner"
)
climate_data.rename(columns={"t2m": "temperature_celsius", "tp": "precipitation_mm"}, inplace=True)

# Save processed climate data
climate_data.to_csv("../data/processed/climate_data_europe.csv", index=False)
print("Climate data processed and saved.")

Loading climate data...


  self.index_grouper = pd.Grouper(


: 

Agriculture data processing.

In [None]:
# Load agriculture data
print("Loading agricultural data...")
agriculture_data = pd.read_csv(agriculture_path)

# Filter relevant crops for Europe and Spain
crops_to_keep = ["Wheat", "Maize", "Rice", "Soybeans", "Potatoes", "Olives", "Grapes", "Tomatoes", "Barley", "Oranges"]
agriculture_data = agriculture_data[agriculture_data["Item"].isin(crops_to_keep)]

# Filter relevant columns and rows
elements_to_keep = ["Production quantity", "Yield", "Area harvested"]
agriculture_data = agriculture_data[agriculture_data["Element"].isin(elements_to_keep)]
agriculture_data = agriculture_data[agriculture_data["Year"] >= 2000]

# Save processed agriculture data
agriculture_data.to_csv("../data/processed/agriculture_data_europe.csv", index=False)
print("Agriculture data processed and saved.")

Data integration.

## Exploratory Data Analysis (EDA)