In [1]:
import cdsapi
import xarray as xr
import numpy as np
import pandas as pd
import time
import requests
import os
import zipfile
import dask

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

import matplotlib.pyplot as plt #

from geopy.geocoders import Nominatim
from datetime import datetime
import timezonefinder
from astral.sun import sun
from astral.location import LocationInfo

# Download and prepare the data 

## Past data
In this first section we download and prepare the data for the last 30 years, the dataset is the [ERA5 monthly averaged data on single levels from 1940 to present](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-monthly-means?tab=overview).


In [50]:
# select the years you want to download
start_year = 1992
end_year = 2022
year_range = [i for i in range(start_year, end_year + 1)]

location_name = ("Puebla de don Fadrique, Spain")
# Use geopy to get the latitude and longitude of the city
geolocator = Nominatim(user_agent="permaculture-climate", timeout=10)
location = geolocator.geocode(location_name)
# Add a delay between requests
time.sleep(2)

In [9]:
#API Call to download past climate data
c = cdsapi.Client()
try:
    data = c.retrieve("reanalysis-era5-single-levels-monthly-means",
    {"format": "grib",
     "product_type": "monthly_averaged_reanalysis_by_hour_of_day",
     "variable": ['10m_u_component_of_wind', '10m_v_component_of_wind', 
                '2m_temperature',
                'total_cloud_cover', 
                'total_precipitation',
                '2m_dewpoint_temperature',
                ],
    "area": [location.latitude + 1, 
             location.longitude - 1, 
             location.latitude - 1, 
             location.longitude + 1],  # North, West, South, East. 
    "year": year_range,
    "month": ['01', '02', '03',
           '04', '05', '06',
           '07', '08', '09',
           '10', '11', '12'],
    "time": ["00:00","01:00","02:00","03:00","04:00","05:00",
             "06:00","07:00","08:00","09:00","10:00","11:00",
             "12:00", "13:00","14:00","15:00","16:00","17:00",
             "18:00","19:00","20:00","21:00","22:00","23:00"]
    })

    # Get the location of the file to download
    url = data.location

    # Download the file
    response = requests.get(url)

    # Check if the request was successful
    response.raise_for_status()

except requests.exceptions.HTTPError as errh:
    print ("HTTP Error:",errh)
except requests.exceptions.ConnectionError as errc:
    print ("Error Connecting:",errc)
except requests.exceptions.Timeout as errt:
    print ("Timeout Error:",errt)
except requests.exceptions.RequestException as err:
    print ("Something went wrong with the request:",err)

else:
    # If the request was successful, write the file
    filename = 'past_climate.grib'
    with open(filename, 'wb') as f:
        f.write(response.content)

    # Print the location where the file is saved
    print(f"File saved at: {os.path.abspath(filename)}")

2023-12-17 15:06:55,847 INFO Welcome to the CDS
2023-12-17 15:06:55,849 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels-monthly-means
2023-12-17 15:06:56,235 INFO Request is completed


File saved at: c:\Users\matthias\Documents\Projects\permaculture-climate\past_climate.grib


In [3]:
#read the data from the downloaded grib file. Because the variables have different dimensions, we need to read them separately
# List of variables to load
variables = ['2t','10v','10u','tp','tcc', '2d'] 

# Dictionary to hold the datasets
datasets = {}

# Open the GRIB file for each variable using the short name parameter
for var in variables:
    ds = xr.open_dataset('past_climate.grib', engine='cfgrib', backend_kwargs={'filter_by_keys': {'shortName': var}})
    datasets[var] = ds

Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file


In [4]:
#calculate relative humidity from temperature and dewpoint temperature
def rh(dewpoint, temperature):
    return 100*(np.exp((17.625*dewpoint)/(243.04+dewpoint))/np.exp((17.625*temperature)/(243.04+temperature)))

rh_all = rh(datasets['2d']['d2m']-273.15, datasets['2t']['t2m']-273.15)

datasets['rh'] = xr.Dataset({'rh': xr.DataArray(rh_all, coords=datasets['2d']['d2m'].coords, dims=datasets['2d']['d2m'].dims)})

In [5]:
#Calculate climatology and perform units conversion. Parallelized the process using dask.

# Chunk the data using dask
chunksize = 600

datasets['tp']['tp'] = datasets['tp']['tp'].chunk({'time': chunksize})
datasets['2t']['t2m'] = datasets['2t']['t2m'].chunk({'time': chunksize})
datasets['rh']['rh'] = datasets['rh']['rh'].chunk({'time': chunksize}) 
datasets['10u']['u10'] = datasets['10u']['u10'].chunk({'time': chunksize})
datasets['10v']['v10'] = datasets['10v']['v10'].chunk({'time': chunksize})
datasets['tcc']['tcc'] = datasets['tcc']['tcc'].chunk({'time': chunksize})

with dask.config.set(scheduler='threads'):  

    print("Calculating precipitation")
    
    # Average precipitation. Converting from m per hour to mm per month
    days_per_month = [31, 28.25, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    avg_prec = datasets['tp']['tp'].groupby('time.month').mean(['time', 'latitude', 'longitude', 'step'])*1000 * 24 * days_per_month
    
    print("Calculating temperature")

    #average temperature. Convert from K to C
    mean_spatial_temp = datasets['2t']['t2m'].mean(['latitude', 'longitude'])-273.15
    avg_temp = mean_spatial_temp.groupby('time.month').mean(['time'])
    mean_spatial_temp['month_year'] = mean_spatial_temp['time'].dt.strftime('%Y-%m')
    
    #Calculate average max temp
    max_monthly_temp = mean_spatial_temp.groupby('month_year').max()
    max_monthly_temp['month'] = max_monthly_temp['month_year'].str.slice(start=5, stop=7).astype(int)
    mean_max_temp = max_monthly_temp.groupby('month').mean()

    #calculate average min temperature
    min_monthly_temp = mean_spatial_temp.groupby('month_year').min()
    min_monthly_temp['month'] = min_monthly_temp['month_year'].str.slice(start=5, stop=7).astype(int)
    mean_min_temp = min_monthly_temp.groupby('month').mean() 
    
    print("Calculating relative humidity")
    
    #relative humidity
    mean_spatial_rh = datasets['rh']['rh'].mean(['latitude', 'longitude'])
    avg_rh = mean_spatial_rh.groupby('time.month').mean(['time'])
    mean_spatial_rh['month_year'] = mean_spatial_rh['time'].dt.strftime('%Y-%m')
    
    #calculate average max rh
    max_monthly_rh = mean_spatial_rh.groupby('month_year').max()
    max_monthly_rh['month'] = max_monthly_rh['month_year'].str.slice(start=5, stop=7).astype(int)
    mean_max_rh = max_monthly_rh.groupby('month').mean()
    
    #calculate average min rh
    min_monthly_rh = mean_spatial_rh.groupby('month_year').min()
    min_monthly_rh['month'] = min_monthly_rh['month_year'].str.slice(start=5, stop=7).astype(int)
    mean_min_rh = min_monthly_rh.groupby('month').mean()
    
    print("Calculating winds")
       
    #Average winds
    avg_u = datasets['10u']['u10'].groupby('time.month').mean(['latitude', 'longitude'])
    avg_v = datasets['10v']['v10'].groupby('time.month').mean(['latitude', 'longitude'])
    
    print("Calculating total cloud cover")

    #Get rid of the latitude and longitude dimensions by averaging the data
    avg_tcc_spatial = datasets['tcc']['tcc'].mean(['longitude', 'latitude'])

    #Now average the data of each hour of each month across the 30 years of data. We end up with 288 data points, representing 24 h per month
    month_hour_grouped = avg_tcc_spatial.groupby(avg_tcc_spatial['time.month'] * 100 + avg_tcc_spatial['time.hour'])
    avg_tcc = month_hour_grouped.mean(dim='time')
    
    print("Climatology calculated")
 
   
#Perfom the calcultaions set above and return the results as a pandas dataframe
avg_prec = avg_prec.compute()
avg_temp = avg_temp.compute()
mean_max_temp = mean_max_temp.compute()
mean_min_temp = mean_min_temp.compute()
avg_rh = avg_rh.compute()
mean_max_rh = mean_max_rh.compute()
mean_min_rh = mean_min_rh.compute()
avg_u = avg_u.compute()
avg_v = avg_v.compute()
avg_tcc = avg_tcc.compute()

Calculating precipitation
Calculating temperature
Calculating relative humidity
Calculating winds
Calculating total cloud cover
Climatology calculated


## Projected data

In this section we download and prepare projected data coming from the [CMIP6 climate projections](https://cds.climate.copernicus.eu/cdsapp#!/dataset/projections-cmip6?tab=overview). 

You can also select the scenario you want to use. We recommend ssp2_4.5. Of all avialable scenarios it has a balanced Shared Socioeconomic Pathway and 4.5°C warming. Here are all available options:
- SSP1-1.9, SSP1-2.6, SSP4-3.4, SSP5-3.4OS, SSP2-4.5, SSP4-6.0, SSP3-7.0, SSP5-8.5


In [6]:
# select the future years range
start_year_forecast = 2016
end_year_forecast = 2046
year_range_forecast = [str(i) for i in range(start_year_forecast, end_year_forecast + 1)]

#select the scenario you want to use for the projections.
#We reccomend ssp2_4.5, which is in our opinion a balanced options. 
# ssp1_1.9 ssp1_2.6 ssp4_3.4, ssp5_3.4os ssp2_4.5 ssp3_7.0 ssp5_8.5
#Note that if you choose the ssp4_6.0 parameter you also have to select another model in the API request as EC Earth3 doesnt provide that model. 


scenario = 'ssp2_4.5'

#variables that will be downloaded
dataset_variables = ['near_surface_relative_humidity', 'near_surface_air_temperature', 'eastward_near_surface_wind', 
                     'northward_near_surface_wind', 'precipitation']


In [15]:
# Send API requests for the prediction data specified in dataset_variables. Each request returns a folder.zip
c = cdsapi.Client()

for variable in dataset_variables:
        #create an if statement, as certain variables also include level, while others dont

    data = c.retrieve(
        'projections-cmip6',
        {
            'format': 'zip',
            'temporal_resolution': 'monthly',
            'variable': variable,
            'experiment': scenario,
            'model': 'ec_earth3_cc',
            'area':[location.latitude + 1, 
                location.longitude - 1, 
                location.latitude - 1, 
                location.longitude + 1],  # North, West, South, East. 
            'month': [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
            ],
            'year': year_range_forecast,

        },
        variable + '.zip')    
    #selectt the location of the file to download
    url = data.location
    # Download the file
    response = requests.get(url)
    # Check if the request was successful
    response.raise_for_status()

    # If the request was successful, write the file
    filename = variable + '.zip'
    with open(filename, 'wb') as f:
        f.write(response.content)

        # Print the location where the file is saved
        print(f"File saved at: {os.path.abspath(filename)}")
print('Downloads completed')

2023-12-19 14:42:41,995 INFO Welcome to the CDS
2023-12-19 14:42:41,996 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6
2023-12-19 14:42:42,462 INFO Request is queued
2023-12-19 14:42:43,519 INFO Request is running
2023-12-19 14:55:01,624 INFO Request is completed
2023-12-19 14:55:01,641 INFO Downloading https://download-0004-clone.copernicus-climate.eu/cache-compute-0004/cache/data5/adaptor.esgf_wps.retrieve-1702994052.5040371-20583-4-71b0cd58-5e2c-4580-9d1e-3d108a869f23.zip to near_surface_relative_humidity.zip (216.4K)
2023-12-19 14:55:03,071 INFO Download rate 151.4K/s
2023-12-19 14:55:03,431 INFO Welcome to the CDS
2023-12-19 14:55:03,433 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6


File saved at: /Users/giacomo/Documents/projects/permaculture-climate/near_surface_relative_humidity.zip


2023-12-19 14:55:03,659 INFO Downloading https://download-0007-clone.copernicus-climate.eu/cache-compute-0007/cache/data6/adaptor.esgf_wps.retrieve-1702897804.881354-1204-9-f8c5182f-4783-4f01-bba7-9eef865d6c87.zip to near_surface_air_temperature.zip (234.4K)
2023-12-19 14:55:04,392 INFO Download rate 320.1K/s
2023-12-19 14:55:04,883 INFO Welcome to the CDS
2023-12-19 14:55:04,884 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6


File saved at: /Users/giacomo/Documents/projects/permaculture-climate/near_surface_air_temperature.zip


2023-12-19 14:55:05,055 INFO Downloading https://download-0012-clone.copernicus-climate.eu/cache-compute-0012/cache/data3/adaptor.esgf_wps.retrieve-1702897907.5961428-30681-7-8bef1012-dd55-45fe-a742-543fa05c89aa.zip to eastward_near_surface_wind.zip (211.7K)
2023-12-19 14:55:05,462 INFO Download rate 527.3K/s
2023-12-19 14:55:05,868 INFO Welcome to the CDS
2023-12-19 14:55:05,869 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6
2023-12-19 14:55:06,004 INFO Downloading https://download-0012-clone.copernicus-climate.eu/cache-compute-0012/cache/data9/adaptor.esgf_wps.retrieve-1702897934.2657735-30738-15-c7c8d945-1bc2-4f0b-8b03-bb02e6e68b64.zip to northward_near_surface_wind.zip (212.6K)


File saved at: /Users/giacomo/Documents/projects/permaculture-climate/eastward_near_surface_wind.zip


2023-12-19 14:55:06,117 INFO Download rate 1.9M/s
2023-12-19 14:55:06,523 INFO Welcome to the CDS
2023-12-19 14:55:06,524 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/projections-cmip6


File saved at: /Users/giacomo/Documents/projects/permaculture-climate/northward_near_surface_wind.zip


2023-12-19 14:55:06,733 INFO Downloading https://download-0006-clone.copernicus-climate.eu/cache-compute-0006/cache/data4/adaptor.esgf_wps.retrieve-1702897958.7588747-13741-7-57b4e28f-3e98-40c6-917e-126fd8ee3a52.zip to precipitation.zip (211.7K)
2023-12-19 14:55:07,140 INFO Download rate 521.3K/s


File saved at: /Users/giacomo/Documents/projects/permaculture-climate/precipitation.zip
Downloads completed


In [7]:
#extract all zip folders into new folders with the name of the variable they belong to
extract_dir = 'prediction_data/'

for filename in os.listdir():

    if filename.endswith('.zip'):
        # Construct the full path to the zip file
        zip_file_path = os.path.join(os.getcwd(), filename)

        # Get the folder name from the zip file (excluding the '.zip' extension)
        folder_name = os.path.splitext(filename)[0]
        
        # Create a directory with the same name as the zip file within the parent directory
        extracted_dir = os.path.join(extract_dir, folder_name)

        os.makedirs(extracted_dir, exist_ok=True)

        # Open the zip file
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            # Extract all the contents to the created directory
            zip_ref.extractall(extracted_dir)

        print(f"Files from {filename} extracted to: {extracted_dir}")

print("Extraction complete.")


Files from air_temperature.zip extracted to: prediction_data/air_temperature
Files from eastward_near_surface_wind.zip extracted to: prediction_data/eastward_near_surface_wind
Files from near_surface_air_temperature.zip extracted to: prediction_data/near_surface_air_temperature
Files from near_surface_relative_humidity.zip extracted to: prediction_data/near_surface_relative_humidity
Files from northward_near_surface_wind.zip extracted to: prediction_data/northward_near_surface_wind
Files from precipitation.zip extracted to: prediction_data/precipitation
Files from relative_humidity.zip extracted to: prediction_data/relative_humidity
Extraction complete.


In [8]:

# find the paths to each .nc file, as they contain the data we need
nc_file_paths = []
for folder_name in os.listdir(extract_dir):
    folder_path = os.path.join(extract_dir, folder_name)

    # Check if it's a directory
    if os.path.isdir(folder_path):
        # Find NC files within the folder
        nc_files = [file for file in os.listdir(folder_path) if file.endswith('.nc')]

        # Assuming there is exactly one NC file in each folder
        if len(nc_files) == 1:
            nc_file_paths.append(os.path.join(folder_path, nc_files[0]))

# Print the list of NC file paths
print("List of NC file paths:")
for nc_path in nc_file_paths:
    print(nc_path)

List of NC file paths:
prediction_data/air_temperature\ta_Amon_CMCC-ESM2_ssp245_r1i1p1f1_gn_20160116-20461216_v20210129.nc
prediction_data/eastward_near_surface_wind\uas_Amon_EC-Earth3-CC_ssp245_r1i1p1f1_gr_20160116-20461216_v20210113.nc
prediction_data/near_surface_air_temperature\tas_Amon_EC-Earth3-CC_ssp245_r1i1p1f1_gr_20160116-20461216_v20210113.nc
prediction_data/near_surface_relative_humidity\hurs_Amon_EC-Earth3-CC_ssp245_r1i1p1f1_gr_20160116-20461216_v20210113.nc
prediction_data/northward_near_surface_wind\vas_Amon_EC-Earth3-CC_ssp245_r1i1p1f1_gr_20160116-20461216_v20210113.nc
prediction_data/precipitation\pr_Amon_EC-Earth3-CC_ssp245_r1i1p1f1_gr_20160116-20461216_v20210113.nc
prediction_data/relative_humidity\hur_Amon_CMCC-ESM2_ssp245_r1i1p1f1_gn_20160116-20461216_v20210129.nc


In [9]:
#create a dictionary containing all datasets
projection_datasets = {}
for  path in nc_file_paths:
    #get the name of the folder the variable is saved in
    folder_path = os.path.dirname(path)
    folder_name = os.path.basename(folder_path)
    #open the nc file append it to projection_dataset dictionary
    proj_ds = xr.open_dataset(path)
    projection_datasets[folder_name] = proj_ds

In [10]:
#calculate and convert different variables. no parallelisation needed here as the dataset is much smaller.

#Convert the values from kg * m^-2 * s^-1 to mm 
proj_avg_prec = projection_datasets['precipitation']['pr'].groupby('time.month').mean(['time', 'lat', 'lon'])*2592000 

#calculate average temperature. Convert from K to C
proj_avg_temp = projection_datasets['near_surface_air_temperature']['tas'].groupby('time.month').mean(['time', 'lat', 'lon'])-273.15

#Same operation but for relative humidity.
proj_avg_hum = projection_datasets['near_surface_relative_humidity']['hurs'].groupby('time.month').mean(['time', 'lat', 'lon'])

#calculate average wind speeds
proj_avg_u = projection_datasets['eastward_near_surface_wind']['uas'].groupby('time.month').mean(['time', 'lat', 'lon'])
proj_avg_v = projection_datasets['northward_near_surface_wind']['vas'].groupby('time.month').mean(['time', 'lat', 'lon'])

# Graphical representations 
In this section we will present both past and projected data.
Before that we have a small presentational text with general information on the selected location.

In [79]:
# Text preparations:

#Get altitude of the selected location using open elevation API
url = f'https://api.open-elevation.com/api/v1/lookup?locations={location.latitude},{location.longitude}'
response = requests.get(url)
data = response.json()
elevation = data['results'][0]['elevation']



# Temperature data:


months = [
    "January", "February", "March", "April",
    "May", "June", "July", "August",
    "September", "October", "November", "December"
]
min_index = np.argmin(avg_temp.values)
max_index = np.argmax(avg_temp.values)




447.96759563684464


In [85]:
#Generated text: 
print(f"""
    {location_name}, is located at an altitude of {int(elevation)} m.
    The lowest temperature is reached in {months[np.argmin(avg_temp.values)]} and is on average {int(min(avg_temp.values))} °C.     
    Maximum temperatures are reached in {months[max_index]} and are on average {int(max(avg_temp.values))} °C.
    On average, {int(sum(avg_prec.values))} mm of rain occour every year, 
    with {months[np.argmax(avg_prec.values)]} beeing the wettest and {months[np.argmin(avg_prec.values)]} the driest month.
    """)



    Puebla de don Fadrique, Spain, is located at an altitude of 1159 m.
    The lowest temperature is reached in January and is on average 6 °C.     
    Maximum temperatures are reached in July and are on average 25 °C.
    On average, 447 mm of rain occour every year, 
    with March beeing the wettest and July the driest month.
    


### Rainfall and temperatures

In [14]:
# Create a subplot with shared x-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add a bar chart for precipitation and projected precipitation to the secondary y-axis
bar_prec = go.Bar(x=avg_prec.month, 
                  y=avg_prec, 
                  name='Precipitation', 
                  opacity=0.5, 
                  marker_color = 'blue', 
                  hovertemplate=('%{x}: %{y:.0f} mm <extra></extra>')
                  )

bar_prec_projected = go.Bar(x=proj_avg_prec.month, 
                            y=proj_avg_prec, 
                            name='Projected precipitation', 
                            marker_color = 'cyan',
                            hovertemplate=('%{x}: %{y:.0f} mm <extra></extra>')
                            )

fig.add_trace(bar_prec)
fig.add_trace(bar_prec_projected)

# Add a line chart for temperature to the primary y-axis
fig.add_trace(go.Scatter(x=avg_temp.month, 
                         y=avg_temp, 
                         mode='lines', 
                         name='Temperature', 
                         line_color = 'red', 
                         hovertemplate=('%{x}: %{y:.0f} °C <extra></extra>')
                         ),
                secondary_y=True
                )

# Add a line chart for projected temperature
fig.add_trace(go.Scatter(x=proj_avg_temp.month, 
                         y=proj_avg_temp, 
                         mode='lines', 
                         name='Projected temperature', 
                         line_color='orange',
                         hovertemplate=('%{x}: %{y:.0f} °C <extra></extra>')
                         ),   
            secondary_y=True,
            )

# Set the layout to have two y-axes
fig.update_layout(title='Average projected temperature and precipitation',
                  yaxis=dict(title='Precipitation (mm)'),
                  yaxis2=dict(title='Temperature (°C)', overlaying='y', side='right'),
                  xaxis=dict(title='Month',
                             tickmode='array',
                             tickvals=avg_temp.month,
                             ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
                             tickangle=-45
                             ),
                    template='simple_white',
                    )
# Add a text on the bottom of the figure
fig.add_annotation(text="""This figure shows the average temperature for each month complemented by the average temperature range per month. 
                   <br> Meaning: the top line shows the average maximum temperature of each month. Same goes for the bottom line. 
                   <br> Keep in mind that this is an average, maximum temperature can be outside of the shown range.
                    <br> When the range reaches 0 or below, a blue line highlights the freezing temperature.
                    """,
                    xref='paper', yref='paper',
                    x=0.5, y=-0.75,  # Adjust this value to position the text below the x-axis legend
                    showarrow=False,
                    align='left',  # Set align to 'left'
                    font=dict(size=12, color='black'),
                    bordercolor='black',  # Set border color
                    borderwidth=0.5,  # Set border width
                    borderpad=7,  # Set border padding 
                    )

# Adjust the bottom margin to create more space below the figure
fig.update_layout(margin=dict(b=155))

# Show the figure
fig.show()

The blue bars show the total rainfall in mm averaged for each month.    
The orange line shows the average temperature in °C per month. 

#### Agerage monthly temperature range

In [77]:
import pandas as pd
import plotly.graph_objects as go

# Assuming avg_temp, mean_max_temp, and mean_min_temp are already defined

# Create a DataFrame from the DataArrays
df = pd.DataFrame({
    'month': avg_temp.month.values,
    'avg_temp': avg_temp.values,
    'max_temp': mean_max_temp.values,
    'min_temp': mean_min_temp.values
})

# Create a line chart for average temperature
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['month'], y=df['avg_temp'],
                         mode='lines', 
                         name='Average temperature', 
                         line_color='orange'
                         )
             )

# Add a line chart for max temperature
fig.add_trace(go.Scatter(x=df['month'], y=df['max_temp'], 
                         mode='lines',
                         name='Average range per month', 
                         line_color='red'
                         )
             )

# Add a line chart for min temperature
fig.add_trace(go.Scatter(x=df['month'], y=df['min_temp'], 
                         mode='lines', 
                         name='Min temperature', 
                         line_color='red', 
                         fill='tonexty', 
                         fillcolor='rgba(255, 0, 0, 0.1)', 
                         showlegend=False
                         )
             )

# Add a line chart for min temperature
if min(mean_min_temp.values) <= 0.9:
    fig.add_hline(y=0, opacity=1, 
                  line_width=2, 
                  line_dash='dash', 
                  line_color='blue',
                  annotation_text='freezing', 
                  annotation_position='top'
                  )

# Set the layout
fig.update_yaxes(range=[0, max(df['max_temp'] + 5)])

fig.update_layout(title='Average temperature range',
                  yaxis=dict(title='Temperature (°C)'),
                  xaxis=dict(
                      title='Month',
                      tickmode='array',
                      tickvals=df['month'],
                      ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
                      tickangle=-45
                      ),
                  template='simple_white',
                  )

# Add text within the figure
fig.add_annotation(text="""This figure shows the average temperature for each month complemented by the average temperature range per month. 
                   <br> Meaning: the top line shows the average maximum temperature of each month, the bottom line the averaged minima.
                    <br> Keep in mind that these are averaged values, maximum temperature can be outside of the shown range.
                    <br> When the range reaches 0 or below, a blue line highlights the freezing temperature.
                    """,
                    xref='paper', yref='paper',
                    x=0.5, y=-0.7,  # Adjust this value to position the text below the x-axis legend
                    showarrow=False,
                    align='left',
                    font=dict(size=12, color='black')
                    )

# Adjust the bottom margin to create more space below the figure
fig.update_layout(margin=dict(b=150))

# Show the figure
fig.update_traces(hovertemplate='%{x}: %{y:.0f} °C <extra></extra>')
fig.show()


This figure shows the average temperature for each month complemented by the average temperature range per month. Meaning: the top line shows the average maximum temperature of each month Same goes for the bottom line. Keep in mind that this is an average, maximum temperature can be outside of the shown range. 
When the range reaches or below, a blue line highlights the freezing temperature. 

#### Agerage monthly relative humidity range

In [93]:
# Create a DataFrame from the DataArrays
df = pd.DataFrame({
    'month': avg_rh.month.values,
    'avg_rh': avg_rh.values,
    'max_rh': mean_max_rh.values,
    'min_rh': mean_min_rh.values
})

# Create a line chart for average rel humidity
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['month'], y=df['avg_rh'], 
                         mode='lines', 
                         name='Average relative humidity', 
                         line_color='rgb(0, 0, 200)')
                         )

# Add a line chart for max rel humidity
fig.add_trace(go.Scatter(x=df['month'], y=df['max_rh'], 
                         mode='lines',
                         name='Average range per month', 
                         line_color='rgb(5, 150, 250)')
                         )

# Add a line chart for min rel humidity
fig.add_trace(go.Scatter(x=df['month'], y=df['min_rh'], 
                         mode='lines', 
                         name='Min realtive humidity', 
                         line_color='rgb(5, 150, 250)', 
                         fill='tonexty', 
                         fillcolor = 'rgba(5, 150, 250, 0.1)', 
                         showlegend=False)
                         )

# Set the layout
fig.update_layout(title='Average relative humidity range',
                  yaxis=dict(title='Relative humidity (%)'),
                  xaxis=dict(
                      title='Month',
                      tickmode='array',
                      tickvals=df['month'],
                      ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
                      tickangle=-45
                  ),
                  template='simple_white'
                )
# Add text within the figure
fig.add_annotation(text=""" Shown is the average monthly relative humidity and the average monthly range. 
                   <br>The range delimiters show the mean of monhtly maximum and minimum relative humidity. 
                   <br>Here as well, keep in mind these are means and not the extreme values.
                   """,
                    xref='paper', yref='paper',
                    x=0.5, y=-0.6,  # Adjust this value to position the text below the x-axis legend
                    showarrow=False,
                    align='left',
                    font=dict(size=12, color='black')
                    )

# Adjust the bottom margin to create more space below the figure
fig.update_layout(margin=dict(b=150))

# Show the figure
fig.update_traces(hovertemplate='%{x}: %{y:.0f} % <extra></extra>')
fig.show()

Shown is the average monthly relative humidity and the average monthly range. The range delimiters show the mean of monhtly maximum and minimum relative humidity. Here as well, keep in mind these are means and not the extreme values.

### Wind speeds and directions

In [20]:
# Calculate wind speeds for past data
wind_speed = np.sqrt(avg_u**2 + avg_v**2)
#convert to km/h
wind_speed = wind_speed*3.6

# Calculate wind direction (see: https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398)
wind_direction = np.mod(180 + np.arctan2(avg_u, avg_v) * (180 / np.pi), 360)

#prepare the data for the wind rose
df = pd.DataFrame({'speed': wind_speed, 'direction': wind_direction})

bins_dir = np.linspace(0, 360, 9)
labels_dir = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"]
bins_speed = np.arange(0, df['speed'].max() + 1.1,  np.round(np.ceil(max(wind_speed.values))/5))
df['direction'] = pd.cut(df['direction'], bins=bins_dir, labels = labels_dir)
df['speed'] = pd.cut(df['speed'], bins=bins_speed)

# Calculate frequencies
frequency_df = df.groupby(['direction', 'speed'], observed=True).size().reset_index(name='frequency')

# Calculate total frequency
total_frequency = frequency_df['frequency'].sum()

# Convert frequency to proportion
frequency_df['frequency'] = frequency_df['frequency'] / total_frequency

# Get the number of unique 'speed' categories
num_categories = len(frequency_df['speed'].unique())

# Sort the 'speed' categories
sorted_categories = frequency_df['speed'].sort_values().unique()

In [22]:
# Calculate wind speeds with projected data
wind_speed = np.sqrt(proj_avg_u**2 + proj_avg_v**2)
#convert to km/h
wind_speed = wind_speed*3.6

# Calculate wind direction (see: https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398)
wind_direction = np.mod(180 + np.arctan2(proj_avg_u, proj_avg_v) * (180 / np.pi), 360)

#prepare the data for the wind rose
df = pd.DataFrame({'speed': wind_speed, 'direction': wind_direction})

bins_dir = np.linspace(0, 360, 9)
labels_dir = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"]
bins_speed = np.arange(0, df['speed'].max() + 1.1,  np.round(np.ceil(max(wind_speed.values))/5))
df['direction'] = pd.cut(df['direction'], bins=bins_dir, labels = labels_dir)
df['speed'] = pd.cut(df['speed'], bins=bins_speed)

# Calculate frequencies
frequency_df_pred = df.groupby(['direction', 'speed'], observed=False).size().reset_index(name='frequency')

# Calculate total frequency
total_frequency_pred = frequency_df_pred['frequency'].sum()

# Convert frequency to proportion
frequency_df_pred['frequency'] = frequency_df_pred['frequency'] / total_frequency_pred

# Get the number of unique 'speed' categories
num_categories_pred = len(frequency_df_pred['speed'].unique())

# Sort the 'speed' categories
sorted_categories_pred = frequency_df_pred['speed'].sort_values().unique()

In [29]:
sorted_category_union = set(sorted_categories_pred) | set(sorted_categories)
sorted_category_union


{Interval(0.0, 1.0, closed='right'),
 Interval(0.0, 3.0, closed='right'),
 Interval(1.0, 2.0, closed='right'),
 Interval(2.0, 3.0, closed='right'),
 Interval(3.0, 4.0, closed='right'),
 Interval(3.0, 6.0, closed='right'),
 Interval(4.0, 5.0, closed='right'),
 Interval(5.0, 6.0, closed='right'),
 Interval(6.0, 9.0, closed='right'),
 Interval(9.0, 12.0, closed='right'),
 Interval(12.0, 15.0, closed='right')}

In [23]:
# Create a custom color scale with the same number of colors as there are categories
custom_color_scale = plt.cm.viridis_r(np.linspace(0, 1, num_categories))

# Convert the color scale to a list of hex color strings
custom_color_scale = [matplotlib.colors.rgb2hex(color) for color in custom_color_scale]

# Define a color map for the sorted 'speed' categories
color_map = {category: color for category, color in zip(sorted_categories, custom_color_scale)}
# Create the wind rose chart
fig = px.bar_polar(frequency_df, 
                   r='frequency', 
                   theta='direction', 
                   color='speed', 
                   template='simple_white', 
                   color_discrete_map=color_map, labels={"speed": "Speed [km/h]"})  # Use the color map

# Update the layout to make it rectangular
fig.update_layout(
    width=800,  # Set the width to 700 pixels
    height=600,  # Set the height to 1000 pixels
    polar_radialaxis_showgrid=True,  # Show radial grid
    polar_angularaxis_showgrid=True  # Show angular grid
)

fig.show()





In [24]:
## check the title
## checke the direction of carddinal points
## fix the color scale
import plotly.graph_objects as go
# Create the wind rose chart for past data
fig = make_subplots(rows=1, cols=2, subplot_titles=('Wind Rose - Past Data', 'Wind Rose - Projected Data'), specs=[[{'type': 'polar'}]*2])

# Assuming color_map is a dictionary mapping categories to colors
for category, color in color_map.items():
    # Filter the data for this category
    category_df = frequency_df[frequency_df['speed'] == category]

    # Add a trace for this category
    fig.add_trace(go.Barpolar(
        r=category_df['frequency'],
        theta=category_df['direction'],
        marker_color=color,  # Use the color from the color map
        opacity=0.7,
        name=str(category)  # Use the category as the trace name
    ), row=1, col=1)

for category, color in color_map.items():
    # Filter the data for this category
    category_df = frequency_df[frequency_df['speed'] == category]

    # Plot the wind rose for projected data
    fig.add_trace(go.Barpolar(
        r=frequency_df_pred['frequency'],
        theta=frequency_df_pred['direction'],
        marker_color=color,
        opacity=0.7,
        name='Projected Data'
    ), row=1, col=2)

# Set the layout
fig.update_layout(
    polar=dict(
        radialaxis=dict(showgrid=True),
        angularaxis=dict(showgrid=True)
    ),
    showlegend=True
)

# Show the wind roses
fig.show()


Each wind direction is represented by a bar. The length of the bar indicates how frequently the wind blows from that direction (in %).
The colours indicate the averaged wind speed in km/h. Keep in mind that these are averaged values and dont indicate how prone your location can be to events like storms.

#### Average cloud cover and sunrise/sunset times

In [95]:
#find the timezone of the location
tf = timezonefinder.TimezoneFinder()
timezone_str = tf.certain_timezone_at(lat=location.latitude, lng=location.longitude)

#define location infos for the astral package only using coordinates
location_info = LocationInfo(None, None, timezone_str, location.latitude, location.longitude)

#define two empty lists for sunrise and sunset times
sunrise_times, sunset_times = [], []

# append sunrise and sunset times for the 15th of every month of 2022. Automatically adjusted for Daylight Saving Time (DST)
for month in range(1, 13):
    date = datetime(2022, month, 15)
    
    s = sun(location_info.observer, date=date, tzinfo=timezone_str)
    sunrise_times.append(s['sunrise'].strftime('%H:%M'))
    sunset_times.append(s['sunset'].strftime('%H:%M')) 

In [128]:
# Create the graph with cloud cover values plus sunrise and sunset times

# Reshape the data to match the format expected by Plotly
data_reshaped = avg_tcc.values.reshape((12, 24)).T*100  # Use -1 to automatically infer the size

fig = go.Figure()

fig.add_trace(go.Heatmap(z=data_reshaped,
                         x=list(range(12)),
                         y=list(range(24)),
                         xgap = 5,
                         colorscale='gray_r',
                         colorbar=dict(title="Cloud Cover [%]"),
                         hovertemplate='Month: %{x}<br>Time:%{y}:00<br>Cloud Cover: %{z:.0f}%<extra></extra>',  # Custom hover text
                        )
              )

# Set x-axis tickvals and ticktext for each month
fig.update_xaxes(tickvals=list(range(12)),
                 ticktext=[f"{month_name}" for month_name in ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]],
                 tickmode='array',  # Use 'array' for custom tickvals and ticktext
                 tickangle=-45,  # Rotate tick labels for better readability
                 )

# Set x-axis tickvals and ticktext for each day of the month

# Add a line for sunset times
fig.add_trace(go.Scatter(x=list(range(12)),
                         y=[float('{:.2f}'.format(int(h) + int(m) / 60)) for h, m in [time.split(':') for time in sunset_times]],
                         mode='lines',
                         line=dict(color='rgb(150,0,255)', width=2),
                         name='Sunset',
                         hovertemplate='Month: %{x}<br>Sunset at: %{text}<extra></extra>',  # Custom hover text 
                         text=sunset_times
                         )
              )

# Add a line for sunrise times
fig.add_trace(go.Scatter(x=list(range(12)),
                         y=[float('{:.2f}'.format(int(h) + int(m) / 60)) for h, m in [time.split(':') for time in sunrise_times]],
                         mode='lines',
                         line=dict(color='rgb(255,65,0)', width=2),
                         name='Sunrise',
                         hovertemplate='Month: %{x}<br>Sunrise at: %{text}<extra></extra>',  # Custom hover text
                         text=sunrise_times
                         )
              )

# Update layout to show custom line in legend and set title and x axis
fig.update_layout(title='Monthly hourly mean cloud cover with sunrise and sunset times',
                  yaxis_title='Hour of the day',
                  xaxis_title='Month',
                  showlegend=True,
                  legend=dict(x=1.02, y=1.15),
                  yaxis=dict(dtick=2,)
                  )


# Figure description text
fig.add_annotation(text="""
                   <br>This plot shows average hourly cloud cover for each month. 
                   <br>Hourly changes of a month describe the cloud cover cycle of an average day for the associated month, 
                   <br>while changes between months highlight the average seasonal trends. 
                   <br>The two lines show sunrise and sunset times, adjusted for the timezone of the location as well as daylight saving times.  
                   """,
                    xref='paper', yref='paper',
                    x=0.5, y=-0.65,  # Adjust this value to position the text below the x-axis legend
                    showarrow=False,
                    align='left',
                    font=dict(size=12, color='black')
                    )

# Adjust the bottom margin to create more space below the figure
fig.update_layout(margin=dict(b=150))

fig.show()

This plot shows average hourly cloud cover for each month. Hourly changes of a month describe the cloud cover cycle of an average day for the associated month, while changes between months highlight the average seasonal trends.     \
The two lines show sunrise and sunset times, adjusted for the timezone of the location as well as daylight saving times.  