In [2]:
import cdsapi
import xarray as xr
import pygrib #
import numpy as np
import pandas as pd
import time
import requests
import os
import xarray as xr


import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly


import numpy as np
import matplotlib.pyplot as plt #
import matplotlib.colors #

from geopy.geocoders import Nominatim
from datetime import datetime
import timezonefinder
from astral.sun import sun
from astral.location import LocationInfo

## Download, prepare and plot the data

In this first section we download and prepare the data for the last 30 years, visualising it using different plots.

### Download and preparation

In [3]:
# select the years you want to download:
start_year = 1992
end_year = 2022
year_range = [i for i in range(start_year, end_year + 1)]


# Use geopy to get the latitude and longitude of the city
geolocator = Nominatim(user_agent="permaculture-climate")
location = geolocator.geocode("Puebla de don Fadrique, Spain")
# Add a delay between requests
time.sleep(1)


In [None]:
#Send API request and download data
c = cdsapi.Client()
try:
    data = c.retrieve("reanalysis-era5-single-levels-monthly-means",
    {"format": "grib",
     "product_type": "monthly_averaged_reanalysis_by_hour_of_day",
     "variable": ['10m_u_component_of_wind', '10m_v_component_of_wind', 
                '2m_temperature',
                'total_cloud_cover', 
                'total_precipitation',
                ],
    "area": [location.latitude + 1, 
             location.longitude - 1, 
             location.latitude - 1, 
             location.longitude + 1],  # North, West, South, East. 
    "year": year_range,
    "month": ['01', '02', '03',
           '04', '05', '06',
           '07', '08', '09',
           '10', '11', '12'],
    "time": ["00:00","01:00","02:00","03:00","04:00","05:00",
             "06:00","07:00","08:00","09:00","10:00","11:00",
             "12:00", "13:00","14:00","15:00","16:00","17:00",
             "18:00","19:00","20:00","21:00","22:00","23:00"]
    })

    # Get the location of the file to download
    url = data.location

    # Download the file
    response = requests.get(url)

    # Check if the request was successful
    response.raise_for_status()

except requests.exceptions.HTTPError as errh:
    print ("HTTP Error:",errh)
except requests.exceptions.ConnectionError as errc:
    print ("Error Connecting:",errc)
except requests.exceptions.Timeout as errt:
    print ("Timeout Error:",errt)
except requests.exceptions.RequestException as err:
    print ("Something went wrong with the request:",err)

else:
    # If the request was successful, write the file
    filename = 'past_climate.grib'
    with open(filename, 'wb') as f:
        f.write(response.content)

    # Print the location where the file is saved
    print(f"File saved at: {os.path.abspath(filename)}")

In [5]:
# List of variables to load
variables = ['2t','10v','10u','tp','tcc']

# Dictionary to hold the datasets
datasets = {}

# Open the GRIB file for each variable using the short name parameter
for var in variables:
    ds = xr.open_dataset('past_climate.grib', engine='cfgrib', backend_kwargs={'filter_by_keys': {'shortName': var}})
    datasets[var] = ds

# Print the datasets just to check if everything worked
""" for var, ds in datasets.items():
    #print(f"{var}:")
    print(ds)
    print("\n") """
print(ds)

Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file


<xarray.Dataset>
Dimensions:     (time: 8928, latitude: 9, longitude: 9)
Coordinates:
    number      int32 ...
  * time        (time) datetime64[ns] 1992-01-01 ... 2022-12-01T23:00:00
    step        timedelta64[ns] ...
    surface     float64 ...
  * latitude    (latitude) float64 38.96 38.71 38.46 38.21 ... 37.46 37.21 36.96
  * longitude   (longitude) float64 -3.435 -3.185 -2.935 ... -1.934 -1.684 358.6
    valid_time  (time) datetime64[ns] ...
Data variables:
    tcc         (time, latitude, longitude) float32 ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2023-12-05T11:54 GRIB to CDM+CF via cfgrib-0.9.1...


### Rainfall and temperatures

In [6]:
# Calculate the climatology and average over latitude longitude and time  

avg_tp = datasets['tp']['tp'].groupby('time.month').mean(['time', 'latitude', 'longitude', 'step'])*1000
avg_temp = datasets['2t']['t2m'].groupby('time.month').mean(['time', 'latitude', 'longitude'])-273.15
max_temp = datasets['2t']['t2m'].groupby('time.month').max(['time', 'latitude', 'longitude'])-273.15
min_temp = datasets['2t']['t2m'].groupby('time.month').min(['time', 'latitude', 'longitude'])-273.15
avg_u = datasets['10u']['u10'].groupby('time.month').mean(['latitude', 'longitude'])
avg_v = datasets['10v']['v10'].groupby('time.month').mean(['latitude', 'longitude'])



In [13]:


# Create a subplot with shared x-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add a bar chart for precipitation to the secondary y-axis
fig.add_trace(
    go.Bar(x=avg_tp.month, y=avg_tp, name='Precipitation', opacity=0.5),
    secondary_y=False,
)

# Add a line chart for temperature to the primary y-axis
fig.add_trace(
    go.Scatter(x=avg_temp.month, y=avg_temp, mode='lines', name='Temperature'),
    secondary_y=True, 
)

# Set the layout to have two y-axes
fig.update_layout(
    title = 'Average monthly temperature and precipitation',
    yaxis=dict(title='Precipitation (mm)'),
    yaxis2=dict(title='Temperature (°C)', overlaying='y', side='right'),
    xaxis=dict(
        title='Month',
        tickmode='array',
        tickvals=avg_temp.month,
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
        tickangle=-45
    ),
    template='simple_white'
)

# Show the figure
fig.show()

The blue bars show the total rainfall in mm averaged for each month.    
The orange line shows the average temperature in °C per month. 

In [14]:
#do do: revisit the title

print(plotly.__version__)

# Create a DataFrame from the DataArrays
df = pd.DataFrame({
    'month': avg_temp.month.values,
    'avg_temp': avg_temp.values,
    'max_temp': max_temp.values,
    'min_temp': min_temp.values
})

# Create a line chart for average temperature

fig = go.Figure()

fig.add_trace(go.Scatter(x=df['month'], y=df['avg_temp'], mode='lines', name='Average temperature', line_color='orange'))

# Add a line chart for max temperature
fig.add_trace(go.Scatter(x=df['month'], y=df['max_temp'], mode='lines', name='Average daily range per month', line_color='red'))

# Add a line chart for min temperature
fig.add_trace(go.Scatter(x=df['month'], y=df['min_temp'], mode='lines', name='Min temperature', line_color='red', fill='tonexty', fillcolor = 'rgba(255, 0, 0, 0.1)'), showlegend=False)

# Add a line chart for min temperature
fig.add_hline(y=0, opacity=1, line_width=2, line_dash='dash', line_color='blue', annotation_text='freezing', annotation_position='top')

# Set the layout
fig.update_layout(
    yaxis=dict(title='Temperature (°C)'),
    xaxis=dict(
        title='Month',
        tickmode='array',
        tickvals=df['month'],
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
        tickangle=-45
    ),
    template='simple_white'
)
# Show the figure
fig.show()

5.18.0


This figure shows the average temperature for each month complemented by the average daily temperature range per month. Meaning: the top line shows the maximum temperature of every single day, averaged over the corresponding month. Same goes for the bottom line. In plain text: the CDS dataset provides us with an hourly mean temperature for each month. For this plot we took the highest, the lowest and the average values. The red area shows the temperature range over 24 h of an average day for each month.  
The line at 0 °C highlights freezing conditions.


### Wind speeds and directions

In [9]:
# Calculate wind speed
wind_speed = np.sqrt(avg_u**2 + avg_v**2)
#convert to km/h
wind_speed = wind_speed*3.6

# Calculate wind direction (see: https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398)
wind_direction = np.mod(180 + np.arctan2(avg_u, avg_v) * (180 / np.pi), 360)

#prepare the data for the wind rose
df = pd.DataFrame({'speed': wind_speed, 'direction': wind_direction})

df['direction'] = pd.cut(df['direction'], bins=np.linspace(0, 360, 9), labels=["N", "NE", "E", "SE", "S", "SW", "W", "NW"])
df['speed'] = pd.cut(df['speed'], bins=np.arange(0, df['speed'].max() + 1.1, 2))

# Calculate frequencies
frequency_df = df.groupby(['direction', 'speed']).size().reset_index(name='frequency')

# Calculate total frequency
total_frequency = frequency_df['frequency'].sum()

# Convert frequency to proportion
frequency_df['frequency'] = frequency_df['frequency'] / total_frequency

# Get the number of unique 'speed' categories
num_categories = len(frequency_df['speed'].unique())

# Sort the 'speed' categories
sorted_categories = frequency_df['speed'].sort_values().unique()


  frequency_df = df.groupby(['direction', 'speed']).size().reset_index(name='frequency')


In [10]:

# Create a custom color scale with the same number of colors as there are categories
custom_color_scale = plt.cm.viridis_r(np.linspace(0, 1, num_categories))

# Convert the color scale to a list of hex color strings
custom_color_scale = [matplotlib.colors.rgb2hex(color) for color in custom_color_scale]

# Define a color map for the sorted 'speed' categories
color_map = {category: color for category, color in zip(sorted_categories, custom_color_scale)}

# Create the wind rose chart
fig = px.bar_polar(frequency_df, 
                   r='frequency', 
                   theta='direction', 
                   color='speed', 
                   template='simple_white', 
                   color_discrete_map=color_map, labels={"speed": "Speed [km/h]"})  # Use the color map

# Update the layout to make it rectangular
fig.update_layout(
    width=800,  # Set the width to 700 pixels
    height=600,  # Set the height to 1000 pixels
    polar_radialaxis_showgrid=True,  # Show radial grid
    polar_angularaxis_showgrid=True  # Show angular grid
)

fig.show()

  grouped = df.groupby(required_grouper, sort=False)  # skip one_group groupers


Each wind direction is represented by a bar. The length of the bar indicates how frequently the wind blows from that direction (in %).
The colours indicate the averaged wind speed in km/h. Keep in mind that these are averaged values and dont indicate how prone your location can be to events like storms.

## Average cloud cover and sunrise/sunset times

In [8]:
#Get rid of the latitude and longitude dimensions by averaging the data
avg_tcc = ds['tcc'].mean(dim=['longitude', 'latitude'])


#Now average the data of each hour of each month across the 30 years of data. We end up with 288 data points, representing 24 h per month
month_hour_grouped = avg_tcc.groupby(avg_tcc['time.month'] * 100 + avg_tcc['time.hour'])
hourly_mean_by_month = month_hour_grouped.mean(dim='time')


DataArrayGroupBy, grouped over 'group'
288 groups with labels 100, 101, 102, ..., 1221, 1222, 1223.


In [None]:
#find the timezone of the location
tf = timezonefinder.TimezoneFinder()
timezone_str = tf.certain_timezone_at(lat=location.latitude, lng=location.longitude)

#define location infos for the astral package only using coordinates
location_info = LocationInfo(None, None, timezone_str, location.latitude, location.longitude)

#define two empty lists for sunrise and sunset times
sunrise_times, sunset_times = [], []

# append sunrise and sunset times for the 15th of every month of 2022. Automatically adjusted for Daylight Saving Time (DST)
for month in range(1, 13):
    date = datetime(2022, month, 15)
    
    s = sun(location_info.observer, date=date, tzinfo=timezone_str)
    sunrise_times.append(s['sunrise'].strftime('%H:%M'))
    sunset_times.append(s['sunset'].strftime('%H:%M'))

print(sunrise_times, sunset_times)
    

In [32]:
# Create a graph with cloud cover values plus sunrise and sunset times

# Get rid of the coding of the month/hour combination used for grouping before. 
month_values = hourly_mean_by_month['group'] // 100
hour_values = hourly_mean_by_month['group'] % 100

# Reshape the data to match the format expected by Plotly
data_reshaped = hourly_mean_by_month.values.reshape((12, 24)).T  # Use -1 to automatically infer the size

fig = go.Figure()

fig.add_trace(go.Heatmap(
    z=data_reshaped,
    x=list(range(12)),
    y=list(range(24)),
    xgap = 5,
    colorscale='gray_r',
    colorbar=dict(title="Cloud Cover"),
))

# Set x-axis tickvals and ticktext for each month
fig.update_xaxes(
    tickvals=list(range(len(month_values))),
    ticktext=[f"{month_name}" for month_name in ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]],
    tickmode='array',  # Use 'array' for custom tickvals and ticktext
    tickangle=-45,  # Rotate tick labels for better readability
)

# Set axis labels and title
fig.update_layout(
    title='Monthly hourly mean cloud cover with sunrise and sunset times',
    yaxis_title='Hour of the day',
    xaxis_title='Month',
)


# Set x-axis tickvals and ticktext for each day of the month


# Add a line for sunset times
fig.add_trace(go.Scatter(
    x=list(range(12)),
    y=[float('{:.2f}'.format(int(h) + int(m) / 60)) for h, m in [time.split(':') for time in sunset_times]],
    mode='lines',
    line=dict(color='rgb(150,0,255)', width=2),
    name='Sunset'
))

# Add a line for sunrise times
fig.add_trace(go.Scatter(
    x=list(range(12)),
    y=[float('{:.2f}'.format(int(h) + int(m) / 60)) for h, m in [time.split(':') for time in sunrise_times]],
    mode='lines',
    line=dict(color='rgb(255,65,0)', width=2),
    
    name='Sunrise'
))


# Update layout to show custom line in legend
fig.update_layout(
    showlegend=True,
    legend=dict(x=1.02, y=1.15),
    yaxis=dict(
        dtick=2,)
)


# Show the plot

fig.show()

This plot shows average hourly cloud cover for each month. Hourly changes of a month describe the cloud cover cycle of an average day for the associated month, while changes between months highlight the average seasonal trends.     \
The two lines show sunrise and sunset times, adjusted for the timezone of the location as well as daylight saving times.  

# PROJECTION DATA

In [None]:
c = cdsapi.Client()

c.retrieve(
    'projections-cmip6',
    {
        'format': 'zip',
        'temporal_resolution': 'monthly',
        'experiment': 'ssp2_4_5',
        'variable': 'air_temperature',
        'level': '1000',
        'model': 'cmcc_esm2',
        'year': [
            '2023', '2026', '2027',
            '2029', '2030', '2033',
            '2034', '2036',
        ],
        'month': [
            '01', '02', '03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ],
        'area': [
            90, -180, -90,
            180,
        ],
    },
    'download.zip')