In [2]:
import cdsapi
import xarray as xr
import numpy as np
import pandas as pd
import time
import requests
import os
import xarray as xr
import zipfile
import dask

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import plotly


import numpy as np
import matplotlib.pyplot as plt #
import matplotlib.colors #

from geopy.geocoders import Nominatim
from datetime import datetime
import timezonefinder
from astral.sun import sun
from astral.location import LocationInfo

# Download and prepare the data 

## Past data
In this first section we download and prepare the data for the last 30 years, the dataset is the [ERA5 monthly averaged data on single levels from 1940 to present](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels-monthly-means?tab=overview).


In [2]:
# select the years you want to download
start_year = 1992
end_year = 2022
year_range = [i for i in range(start_year, end_year + 1)]


# Use geopy to get the latitude and longitude of the city
geolocator = Nominatim(user_agent="permaculture-climate")
location = geolocator.geocode("Puebla de don Fadrique, Spain")
# Add a delay between requests
time.sleep(1)


In [9]:
c = cdsapi.Client()
try:
    data = c.retrieve("reanalysis-era5-single-levels-monthly-means",
    {"format": "grib",
     "product_type": "monthly_averaged_reanalysis_by_hour_of_day",
     "variable": ['10m_u_component_of_wind', '10m_v_component_of_wind', 
                '2m_temperature',
                'total_cloud_cover', 
                'total_precipitation',
                '2m_dewpoint_temperature',
                ],
    "area": [location.latitude + 1, 
             location.longitude - 1, 
             location.latitude - 1, 
             location.longitude + 1],  # North, West, South, East. 
    "year": year_range,
    "month": ['01', '02', '03',
           '04', '05', '06',
           '07', '08', '09',
           '10', '11', '12'],
    "time": ["00:00","01:00","02:00","03:00","04:00","05:00",
             "06:00","07:00","08:00","09:00","10:00","11:00",
             "12:00", "13:00","14:00","15:00","16:00","17:00",
             "18:00","19:00","20:00","21:00","22:00","23:00"]
    })

    # Get the location of the file to download
    url = data.location

    # Download the file
    response = requests.get(url)

    # Check if the request was successful
    response.raise_for_status()

except requests.exceptions.HTTPError as errh:
    print ("HTTP Error:",errh)
except requests.exceptions.ConnectionError as errc:
    print ("Error Connecting:",errc)
except requests.exceptions.Timeout as errt:
    print ("Timeout Error:",errt)
except requests.exceptions.RequestException as err:
    print ("Something went wrong with the request:",err)

else:
    # If the request was successful, write the file
    filename = 'past_climate.grib'
    with open(filename, 'wb') as f:
        f.write(response.content)

    # Print the location where the file is saved
    print(f"File saved at: {os.path.abspath(filename)}")

2023-12-17 15:06:55,847 INFO Welcome to the CDS
2023-12-17 15:06:55,849 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels-monthly-means
2023-12-17 15:06:56,235 INFO Request is completed


File saved at: c:\Users\matthias\Documents\Projects\permaculture-climate\past_climate.grib


In [19]:
# List of variables to load
variables = ['2t','10v','10u','tp','tcc', '2d']

# Dictionary to hold the datasets
datasets = {}

# Open the GRIB file for each variable using the short name parameter
for var in variables:
    ds = xr.open_dataset('past_climate.grib', engine='cfgrib', backend_kwargs={'filter_by_keys': {'shortName': var}})
    datasets[var] = ds

# Print the datasets just to check if everything worked
print(datasets)

Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file
Ignoring index file 'past_climate.grib.02ccc.idx' older than GRIB file


{'2t': <xarray.Dataset>
Dimensions:     (time: 8928, latitude: 9, longitude: 9)
Coordinates:
    number      int32 ...
  * time        (time) datetime64[ns] 1992-01-01 ... 2022-12-01T23:00:00
    step        timedelta64[ns] ...
    surface     float64 ...
  * latitude    (latitude) float64 38.96 38.71 38.46 38.21 ... 37.46 37.21 36.96
  * longitude   (longitude) float64 -3.435 -3.185 -2.935 ... -1.934 -1.684 358.6
    valid_time  (time) datetime64[ns] ...
Data variables:
    t2m         (time, latitude, longitude) float32 ...
Attributes:
    GRIB_edition:            1
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2023-12-18T12:36 GRIB to CDM+CF via cfgrib-0.9.1..., '10v': <xarray.Dataset>
Dimensions:     (time: 8928, latitude: 9, longitude: 9)
Co

In [20]:
#calculating relative humidity using temperature and dewpoint temperature
def rh(dewpoint, temperature):
    return 100*(np.exp((17.625*dewpoint)/(243.04+dewpoint))/np.exp((17.625*temperature)/(243.04+temperature)))

rh_all = rh(datasets['2d']['d2m']-273.15, datasets['2t']['t2m']-273.15)

datasets['rh'] = xr.Dataset({'rh': xr.DataArray(rh_all, coords=datasets['2d']['d2m'].coords, dims=datasets['2d']['d2m'].dims)})

In [175]:
#calculate different means and conversions. Parallelized the process to make it significantly faster

# Chunk the data
datasets['tp']['tp'] = datasets['tp']['tp'].chunk({'time': -1})
datasets['2t']['t2m'] = datasets['2t']['t2m'].chunk({'time': -1})
datasets['rh']['rh'] = datasets['rh']['rh'].chunk({'time': -1}) 
datasets['10u']['u10'] = datasets['10u']['u10'].chunk({'time': -1})
datasets['10v']['v10'] = datasets['10v']['v10'].chunk({'time': -1})
datasets['tcc']['tcc'] = datasets['tcc']['tcc'].chunk({'time': -1})

with dask.config.set(scheduler='threads'):  
    print("Calculating precipitation ")
    # Calculate the climatology and average over latitude and longitude. 
    
    # Average precipitation. Converting from m per hour to mm per month
    days_per_month = [31, 28.25, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    avg_prec = datasets['tp']['tp'].groupby('time.month').mean(['time', 'latitude', 'longitude', 'step']).compute()*1000 * 24 * days_per_month
    print("Calculating temperature")
    
    #average temperatur. Convert from K to C
    mean_spatial_temp = datasets['2t']['t2m'].mean(['latitude', 'longitude'])-273.15
    avg_temp = mean_spatial_temp.groupby('time.month').mean(['time'])
    mean_spatial_temp['month_year'] = mean_spatial_temp['time'].dt.strftime('%Y-%m')
    
    #Calculate average max temp
    max_monthly_temp = mean_spatial_temp.groupby('month_year').max()
    max_monthly_temp['month'] = max_monthly_temp['month_year'].str.slice(start=5, stop=7).astype(int)
    mean_max_temp = max_monthly_temp.groupby('month').mean()

    #calculate average min temperature
    min_monthly_temp = mean_spatial_temp.groupby('month_year').min()
    min_monthly_temp['month'] = min_monthly_temp['month_year'].str.slice(start=5, stop=7).astype(int)
    mean_min_temp = min_monthly_temp.groupby('month').mean() 
    
    print("Calculating relative humidity")
    
    #relative humidity
    mean_spatial_rh = datasets['rh']['rh'].mean(['latitude', 'longitude'])
    avg_rh = mean_spatial_rh.groupby('time.month').mean(['time'])
    mean_spatial_rh['month_year'] = mean_spatial_rh['time'].dt.strftime('%Y-%m')
    
    #calculate average max rh
    max_monthly_rh = mean_spatial_rh.groupby('month_year').max()
    max_monthly_rh['month'] = max_monthly_rh['month_year'].str.slice(start=5, stop=7).astype(int)
    mean_max_rh = max_monthly_rh.groupby('month').mean()
    
    #calculate average min rh
    min_monthly_rh = mean_spatial_rh.groupby('month_year').min()
    min_monthly_rh['month'] = min_monthly_rh['month_year'].str.slice(start=5, stop=7).astype(int)
    mean_min_rh = min_monthly_rh.groupby('month').mean()
    
    print("Calculating winds")
       
    #Average winds
    avg_u = datasets['10u']['u10'].groupby('time.month').mean(['latitude', 'longitude'])
    avg_v = datasets['10v']['v10'].groupby('time.month').mean(['latitude', 'longitude'])
    
    print("Calculating total cloud cover")

    #Get rid of the latitude and longitude dimensions by averaging the data
    avg_tcc_spatial = datasets['tcc']['tcc'].mean(['longitude', 'latitude'])


    #Now average the data of each hour of each month across the 30 years of data. We end up with 288 data points, representing 24 h per month
    month_hour_grouped = avg_tcc_spatial.groupby(avg_tcc_spatial['time.month'] * 100 + avg_tcc_spatial['time.hour'])
    avg_tcc = month_hour_grouped.mean(dim='time')
    
    print("Climatology calculated")
    
# Convert Dask DataFrames back to pandas DataFrames
avg_prec = avg_prec.compute()
avg_temp = avg_temp.compute()
mean_max_temp = mean_max_temp.compute()
mean_min_temp = mean_min_temp.compute()
avg_rh = avg_rh.compute()
mean_max_rh = mean_max_rh.compute()
mean_min_rh = mean_min_rh.compute()
avg_u = avg_u.compute()
avg_v = avg_v.compute()
avg_tcc = avg_tcc.compute()


Calculating precipitation 
Calculating temperature
Calculating relative humidity
Calculating winds



Slicing with an out-of-order index is generating 31 times more chunks


Slicing with an out-of-order index is generating 31 times more chunks



Calculating total cloud cover
Climatology calculated


## Projected data

In this section we download and prepare projected data coming from the [CMIP6 climate projections](https://cds.climate.copernicus.eu/cdsapp#!/dataset/projections-cmip6?tab=overview). The selected scenario is the ssp2 scenario with 4.5 °C warming. 

In [None]:
# select the future years range
start_year_forecast = 2016
end_year_forecast = 2046
year_range_forecast = [str(i) for i in range(start_year_forecast, end_year_forecast + 1)]

#variables that will be downloaded
dataset_variables = ['near_surface_relative_humidity', 'near_surface_air_temperature', 'eastward_near_surface_wind', 
                     'northward_near_surface_wind', 'precipitation']


### Rainfall and temperatures

In [None]:
# Send API requests for the data specified in dataset_variables. Each request returns a folder.zip

c = cdsapi.Client()

for variable in dataset_variables:
        #create an if statement, as certain variables also include level, while others dont


    data = c.retrieve(
        'projections-cmip6',
        {
            'format': 'zip',
            'temporal_resolution': 'monthly',
            'variable': variable,
            'experiment': 'ssp2_4_5',
            'model': 'ec_earth3_cc',
            'area':[location.latitude + 1, 
                location.longitude - 1, 
                location.latitude - 1, 
                location.longitude + 1],  # North, West, South, East. 
            'month': [
                '01', '02', '03',
                '04', '05', '06',
                '07', '08', '09',
                '10', '11', '12',
            ],
            'year': year_range_forecast,

        },
        variable + '.zip')    
    #selectt the location of the file to download
    url = data.location
    # Download the file
    response = requests.get(url)
    # Check if the request was successful
    response.raise_for_status()

    # If the request was successful, write the file
    filename = variable + '.zip'
    with open(filename, 'wb') as f:
        f.write(response.content)

        # Print the location where the file is saved
        print(f"File saved at: {os.path.abspath(filename)}")
print('Downloads completed')



In [None]:
#extract all zip folders into new folders with the name of the variable they belong to

extract_dir = 'C:/Users/matthias/Documents/Projects/permaculture-climate/prediction_data/'

for filename in os.listdir():

    if filename.endswith('.zip'):
        # Construct the full path to the zip file
        zip_file_path = os.path.join(os.getcwd(), filename)

        # Get the folder name from the zip file (excluding the '.zip' extension)
        folder_name = os.path.splitext(filename)[0]
        
        # Create a directory with the same name as the zip file within the parent directory
        extracted_dir = os.path.join(extract_dir, folder_name)

        os.makedirs(extracted_dir, exist_ok=True)

        # Open the zip file
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            # Extract all the contents to the created directory
            zip_ref.extractall(extracted_dir)

        print(f"Files from {filename} extracted to: {extracted_dir}")

print("Extraction complete.")

In [None]:
# find the paths to each .nc file, as they contain the data we need

nc_file_paths = []
for folder_name in os.listdir(extract_dir):
    folder_path = os.path.join(extract_dir, folder_name)

    # Check if it's a directory
    if os.path.isdir(folder_path):
        # Find NC files within the folder
        nc_files = [file for file in os.listdir(folder_path) if file.endswith('.nc')]

        # Assuming there is exactly one NC file in each folder
        if len(nc_files) == 1:
            nc_file_paths.append(os.path.join(folder_path, nc_files[0]))

# Print the list of NC file paths
print("List of NC file paths:")
for nc_path in nc_file_paths:
    print(nc_path)

In [None]:
#create a dictionary containing all datasets
projection_datasets = {}
for  path in nc_file_paths:
    #get the name of the folder the variable is saved in
    folder_path = os.path.dirname(path)
    folder_name = os.path.basename(folder_path)
    #open the nc file append it to projection_dataset dictionary
    proj_ds = xr.open_dataset(path)
    projection_datasets[folder_name] = proj_ds
    


In [None]:
#calculate and convert different variables. no parallelisation needed here as the dataset is much smaller.

#calculate average total precipitation 
#Convert the values from kg * m^-2 * s^-1 to mm 
proj_avg_prec = projection_datasets['precipitation']['pr'].groupby('time.month').mean(['time', 'lat', 'lon'])*2592000 

#calculate average temperature. Convert from K to C
proj_avg_temp = projection_datasets['near_surface_air_temperature']['tas'].groupby('time.month').mean(['time', 'lat', 'lon'])-273.15

#Same operation but for relative humidity.
proj_avg_hum = projection_datasets['near_surface_relative_humidity']['hurs'].groupby('time.month').mean(['time', 'lat', 'lon'])

#calculate average wind speeds
proj_avg_u = projection_datasets['eastward_near_surface_wind']['uas'].groupby('time.month').mean(['time', 'lat', 'lon'])
proj_avg_v = projection_datasets['northward_near_surface_wind']['vas'].groupby('time.month').mean(['time', 'lat', 'lon'])


# Graphical representations 
In this section we will present both past and projected data 

In [123]:

# Create a subplot with shared x-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add a bar chart for precipitation and projected precipitation to the secondary y-axis
bar_prec = go.Bar           (x=avg_prec.month, y=avg_prec, name='Precipitation', opacity=0.5, marker_color = 'blue',
                             hovertemplate=('%{x}: %{y:.0f} mm <extra></extra>'))
bar_prec_projected = go.Bar(x=proj_avg_prec.month, y=proj_avg_prec, name='Projected precipitation', marker_color = 'cyan',
                            hovertemplate=('%{x}: %{y:.0f} mm <extra></extra>'))

fig.add_trace(bar_prec)
fig.add_trace(bar_prec_projected)


# Add a line chart for temperature to the primary y-axis
fig.add_trace(go.Scatter(x=avg_temp.month, y=avg_temp, mode='lines', name='Temperature', line_color = 'red', 
                         hovertemplate=('%{x}: %{y:.0f} °C <extra></extra>')),
              
              secondary_y=True)

# Add a line chart for projected temperature
fig.add_trace(
    go.Scatter(x=proj_avg_temp.month, y=proj_avg_temp, mode='lines', name='Projected temperature', line_color='orange',
               hovertemplate=('%{x}: %{y:.0f} °C <extra></extra>')),   
    secondary_y=True,
)

# Set the layout to have two y-axes
fig.update_layout(
    title='Average projected temperature and precipitation',
    yaxis=dict(title='Precipitation (mm)'),
    yaxis2=dict(title='Temperature (°C)', overlaying='y', side='right'),
    xaxis=dict(
        title='Month',
        tickmode='array',
        tickvals=avg_temp.month,
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
        tickangle=-45
    ),
    template='simple_white',
)




# Show the figure
fig.show()

The blue bars show the total rainfall in mm averaged for each month.    
The orange line shows the average temperature in °C per month. 

In [125]:
# Create a DataFrame from the DataArrays
df = pd.DataFrame({
    'month': avg_temp.month.values,
    'avg_temp': avg_temp.values,
    'max_temp': mean_max_temp.values,
    'min_temp': mean_min_temp.values
})

# Create a line chart for average temperature
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['month'], y=df['avg_temp'], 
                         mode='lines', 
                         name='Average temperature', 
                         line_color='orange')
                         )

# Add a line chart for max temperature
fig.add_trace(go.Scatter(x=df['month'], y=df['max_temp'], 
                         mode='lines',
                         name='Average range per month', 
                         line_color='red')
                         )

# Add a line chart for min temperature
fig.add_trace(go.Scatter(x=df['month'], y=df['min_temp'], 
                         mode='lines', 
                         name='Min temperature', 
                         line_color='red', 
                         fill='tonexty', 
                         fillcolor = 'rgba(255, 0, 0, 0.1)', 
                         showlegend=False)
                         )

# Add a line chart for min temperature
if min(mean_min_temp.values) <= 0.9:
    fig.add_hline(y=0, opacity=1, 
                  line_width=2, 
                  line_dash='dash', 
                  line_color='blue',
                  annotation_text='freezing', 
                  annotation_position='top')

# Set the layout
fig.update_yaxes(range=[0, max(df['max_temp'] + 5)])

fig.update_layout(
    title='Average temperature range',
    
    yaxis=dict(title='Temperature (°C)'),
    xaxis=dict(
        title='Month',
        tickmode='array',
        tickvals=df['month'],
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
        tickangle=-45
    ),
    template='simple_white'
)
# Show the figure

fig.update_traces(hovertemplate='%{x}: %{y:.0f} °C <extra></extra>')
fig.show()

This figure shows the average temperature for each month complemented by the average temperature range per month. Meaning: the top line shows the average maximum temperature of each month Same goes for the bottom line. Keep in mind that this is an average, maximum temperature can be outside of the shown range. 
When the range reaches or below, a blue line highlights the freezing temperature. 


In [172]:
# Create a DataFrame from the DataArrays
df = pd.DataFrame({
    'month': avg_rh.month.values,
    'avg_rh': avg_rh.values,
    'max_rh': mean_max_rh.values,
    'min_rh': mean_min_rh.values
})

# Create a line chart for average rel humidity
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['month'], y=df['avg_rh'], 
                         mode='lines', 
                         name='Average relative humidity', 
                         line_color='rgb(0, 0, 200)')
                         )

# Add a line chart for max rel humidity
fig.add_trace(go.Scatter(x=df['month'], y=df['max_rh'], 
                         mode='lines',
                         name='Average range per month', 
                         line_color='rgb(5, 150, 250)')
                         )

# Add a line chart for min rel humidity
fig.add_trace(go.Scatter(x=df['month'], y=df['min_rh'], 
                         mode='lines', 
                         name='Min realtive humidity', 
                         line_color='rgb(5, 150, 250)', 
                         fill='tonexty', 
                         fillcolor = 'rgba(5, 150, 250, 0.1)', 
                         showlegend=False)
                         )


# Set the layout
# fig.update_yaxes(range=[0, max(df['max_rh'].values)])

fig.update_layout(
    title='Average relative humidity range',
    
    yaxis=dict(title='Relative humidity (%)'),
    xaxis=dict(
        title='Month',
        tickmode='array',
        tickvals=df['month'],
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
        tickangle=-45
    ),
    template='simple_white'
)
# Show the figure

fig.update_traces(hovertemplate='%{x}: %{y:.0f} % <extra></extra>')
fig.show()

Shown is the average monthly relative humidity and the average monthly range. The range delimiters show the mean of monhtly maximum and minimum relative humidity. Here as well, keep in mind these are means and not the extreme values.

### Wind speeds and directions

In [44]:
# Calculate wind speeds for past data
wind_speed = np.sqrt(avg_u**2 + avg_v**2)
#convert to km/h
wind_speed = wind_speed*3.6

# Calculate wind direction (see: https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398)
wind_direction = np.mod(180 + np.arctan2(avg_u, avg_v) * (180 / np.pi), 360)

#prepare the data for the wind rose
df = pd.DataFrame({'speed': wind_speed, 'direction': wind_direction})

bins_dir = np.linspace(0, 360, 9)
labels_dir = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"]
bins_speed = np.arange(0, df['speed'].max() + 1.1,  np.round(np.ceil(max(wind_speed.values))/5))
df['direction'] = pd.cut(df['direction'], bins=bins_dir, labels = labels_dir)
df['speed'] = pd.cut(df['speed'], bins=bins_speed)

# Calculate frequencies
frequency_df = df.groupby(['direction', 'speed']).size().reset_index(name='frequency')

# Calculate total frequency
total_frequency = frequency_df['frequency'].sum()

# Convert frequency to proportion
frequency_df['frequency'] = frequency_df['frequency'] / total_frequency

# Get the number of unique 'speed' categories
num_categories = len(frequency_df['speed'].unique())

# Sort the 'speed' categories
sorted_categories = frequency_df['speed'].sort_values().unique()






In [None]:
# Calculate wind speeds with projected data
wind_speed = np.sqrt(proj_avg_u**2 + proj_avg_v**2)
#convert to km/h
wind_speed = wind_speed*3.6

# Calculate wind direction (see: https://confluence.ecmwf.int/pages/viewpage.action?pageId=133262398)
wind_direction = np.mod(180 + np.arctan2(proj_avg_u, proj_avg_v) * (180 / np.pi), 360)

#prepare the data for the wind rose
df = pd.DataFrame({'speed': wind_speed, 'direction': wind_direction})

bins_dir = np.linspace(0, 360, 9)
labels_dir = ["N", "NE", "E", "SE", "S", "SW", "W", "NW"]
bins_speed = np.arange(0, df['speed'].max() + 1.1,  np.round(np.ceil(max(wind_speed.values))/5))
df['direction'] = pd.cut(df['direction'], bins=bins_dir, labels = labels_dir)
df['speed'] = pd.cut(df['speed'], bins=bins_speed)

# Calculate frequencies
frequency_df = df.groupby(['direction', 'speed']).size().reset_index(name='frequency')

# Calculate total frequency
total_frequency = frequency_df['frequency'].sum()

# Convert frequency to proportion
frequency_df['frequency'] = frequency_df['frequency'] / total_frequency

# Get the number of unique 'speed' categories
num_categories = len(frequency_df['speed'].unique())

# Sort the 'speed' categories
sorted_categories = frequency_df['speed'].sort_values().unique()

In [79]:
import plotly.subplots as sp

fig = sp.make_subplots(rows=1, cols=2, subplot_titles=['Graph 1', 'Graph 2'])

# Duplicate the wind rose chart for the first subplot
fig1 = go.FigureWidget(fig)
fig1.add_trace(fig['data'][0])  # Assuming there's only one trace in the original figure
fig1.update_layout(width=800, height=600, polar_radialaxis_showgrid=True, polar_angularaxis_showgrid=True)

# Duplicate the wind rose chart for the second subplot
fig2 = go.FigureWidget(fig)
fig2.add_trace(fig['data'][0])  # Assuming there's only one trace in the original figure
fig2.update_layout(width=800, height=600, polar_radialaxis_showgrid=True, polar_angularaxis_showgrid=True)

# Update the layout of the main figure
fig.update_layout(showlegend=True)  # Set showlegend to True for a single legend

# Update the subplot titles
fig.update_layout(
    title_text='Wind Rose Charts',
    title_x=0.5,
)

# Update the layout of each subplot
fig1.update_layout(title_text='Graph 1', title_x=0.5)
fig2.update_layout(title_text='Graph 2', title_x=0.5)

# Update the color scale for each subplot if needed
# (e.g., if the color scale is specific to the first graph, you might want to adjust it)
# fig2.update_traces(marker=dict(colorscale=custom_color_scale))

# Display the subplots
fig.add_trace(fig1['data'][0], row=1, col=1)
fig.add_trace(fig2['data'][0], row=1, col=2)

fig.show()

ImportError: Please install ipywidgets>=7.0.0 to use the FigureWidget class

Each wind direction is represented by a bar. The length of the bar indicates how frequently the wind blows from that direction (in %).
The colours indicate the averaged wind speed in km/h. Keep in mind that these are averaged values and dont indicate how prone your location can be to events like storms.

## Average cloud cover and sunrise/sunset times

In [200]:
#find the timezone of the location
tf = timezonefinder.TimezoneFinder()
timezone_str = tf.certain_timezone_at(lat=location.latitude, lng=location.longitude)

#define location infos for the astral package only using coordinates
location_info = LocationInfo(None, None, timezone_str, location.latitude, location.longitude)

#define two empty lists for sunrise and sunset times
sunrise_times, sunset_times = [], []

# append sunrise and sunset times for the 15th of every month of 2022. Automatically adjusted for Daylight Saving Time (DST)
for month in range(1, 13):
    date = datetime(2022, month, 15)
    
    s = sun(location_info.observer, date=date, tzinfo=timezone_str)
    sunrise_times.append(s['sunrise'].strftime('%H:%M'))
    sunset_times.append(s['sunset'].strftime('%H:%M'))



print(sunrise_times)
    

['08:24', '08:00', '07:21', '07:34', '06:59', '06:46', '06:59', '07:25', '07:51', '08:18', '07:50', '08:18']


In [201]:
# Create a graph with cloud cover values plus sunrise and sunset times

# Get rid of the coding of the month/hour combination used for grouping before. 
month_values = avg_tcc['group'] // 100
hour_values = avg_tcc['group'] % 100

# Reshape the data to match the format expected by Plotly
data_reshaped = avg_tcc.values.reshape((12, 24)).T  # Use -1 to automatically infer the size

fig = go.Figure()

fig.add_trace(go.Heatmap(
    z=data_reshaped,
    x=list(range(12)),
    y=list(range(24)),
    xgap = 5,
    colorscale='gray_r',
    colorbar=dict(title="Cloud Cover"),
))

# Set x-axis tickvals and ticktext for each month
fig.update_xaxes(
    tickvals=list(range(len(month_values))),
    ticktext=[f"{month_name}" for month_name in ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]],
    tickmode='array',  # Use 'array' for custom tickvals and ticktext
    tickangle=-45,  # Rotate tick labels for better readability
)

# Set axis labels and title
fig.update_layout(
    title='Monthly hourly mean cloud cover with sunrise and sunset times',
    yaxis_title='Hour of the day',
    xaxis_title='Month',
)


# Set x-axis tickvals and ticktext for each day of the month


# Add a line for sunset times
fig.add_trace(go.Scatter(
    x=list(range(12)),
    y=[float('{:.2f}'.format(int(h) + int(m) / 60)) for h, m in [time.split(':') for time in sunset_times]],
    mode='lines',
    line=dict(color='rgb(150,0,255)', width=2),
    name='Sunset'
))

# Add a line for sunrise times
fig.add_trace(go.Scatter(
    x=list(range(12)),
    y=[float('{:.2f}'.format(int(h) + int(m) / 60)) for h, m in [time.split(':') for time in sunrise_times]],
    mode='lines',
    line=dict(color='rgb(255,65,0)', width=2),
    
    name='Sunrise'
))


# Update layout to show custom line in legend
fig.update_layout(
    showlegend=True,
    legend=dict(x=1.02, y=1.15),
    yaxis=dict(
        dtick=2,)
)


# Show the plot

fig.show()

### JUNK CODE FOR NOT WORKING CLODCOVER PLOT

In [194]:
# Create a graph with cloud cover values plus sunrise and sunset times

# Get rid of the coding of the month/hour combination used for grouping before. 
month_values = avg_tcc['group'] // 100
hour_values = avg_tcc['group'] % 100

# Reshape the data to match the format expected by Plotly
tcc_reshaped = avg_tcc.values.reshape((12, 24)).T  # Use -1 to automatically infer the size

fig = make_subplots(specs=[[{"secondary_y": True}]])


fig.add_trace(go.Heatmap(
    z=tcc_reshaped,
    x=list(range(12)),
    y=list(range(24)),
    xgap = 5,
    colorscale='gray_r',
    colorbar=dict(title="Cloud Cover"),
    hovertemplate='%{x}, %{y}.00: %{z:.2f} % <extra></extra>'

),
    secondary_y=False
    )

# Set x-axis tickvals and ticktext for each month
fig.update_xaxes(
    tickvals=list(range(len(month_values))),
    ticktext=[f"{month_name}" for month_name in ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]],
    tickmode='array',  # Use 'array' for custom tickvals and ticktext
    tickangle=-45,  # Rotate tick labels for better readability
)


# Set axis labels and title
fig.update_layout(
    title='Monthly hourly mean cloud cover with sunrise and sunset times',
    yaxis_title='Hour of the day',
    xaxis_title='Month',
)

# Add a line for sunset times
fig.add_trace(go.Scatter(
    x=list(range(12)),
    y=sunset_times,
    mode='lines',
    line=dict(color='rgb(150,0,255)', width=2),
    name='Sunset'
),
              secondary_y=True)

# Add a line for sunrise times
fig.add_trace(go.Scatter(
    x=list(range(12)),
    y=,
    mode='lines',
    line=dict(color='rgb(255,65,0)', width=2),
    
    name='Sunrise'
))


# Update layout to show custom line in legend
fig.update_layout(
    showlegend=True,
    legend=dict(x=1.02, y=1.15),
    yaxis=dict(
        dtick=2,)
)


# Show the plot

fig.show()

This plot shows average hourly cloud cover for each month. Hourly changes of a month describe the cloud cover cycle of an average day for the associated month, while changes between months highlight the average seasonal trends.     \
The two lines show sunrise and sunset times, adjusted for the timezone of the location as well as daylight saving times.  

In [198]:
# Define the location (replace with the latitude and longitude of your location)
city = LocationInfo("Kiel", "Germany", "London", 51.5, -0.116)

# Calculate the sunset times for each month
sunset_times = []
for month in range(1, 13):
    date = pd.Timestamp(year=2023, month=month, day=1)  # Use the desired year
    sunset_time = sun(city.observer, date=date, tzinfo=city.timezone).sunset
    sunset_times.append(sunset_time)

for month in range(1, 13):
    date = datetime(2022, month, 15)
    
    s = sun(location_info.observer, date=date, tzinfo=timezone_str)
    sunrise_times.append(s['sunrise'].strftime('%H:%M'))
    sunset_times.append(s['sunset'].strftime('%H:%M'))

# Create a DataFrame
df = pd.DataFrame({
    'Month': pd.date_range(start='2023-01-01', end='2023-12-01', freq='MS').strftime('%B'),  # Use the desired year
    'Sunset Time': sunset_times
})

# Plot with Plotly
fig = px.line(df, x='Month', y='Sunset Time', title='Variation of Sunset Times')
fig.update_xaxes(title='Month')
fig.update_yaxes(title='Sunset Time')
fig.show()

ZoneInfoNotFoundError: 'No time zone found with key London'