## simple notebook to get ERA5 data for Kelmarsh wind farm

We often need to fill in gaps for missing on site records. Having datasets like MERRA2 and ERA5 gives us a data source that can be used to build models to fill gaps. This is a simplified example. A professional analyst would convert this notebook to a function that accepted the lat/long for a site, or would develop a loop to get multiple sites and would set the date range for data programmatically.  

This notebook gets ERA5 data for the Kelmarsh wind farm in the UK at lat 52.401461, long -0.943105
using information available from <br>
CDS https://cds.climate.copernicus.eu/how-to-api


This code will not work until you sign up for a cds account and follow the instructions on the CDS page above to get your own api key.

Did you get your own api key yet?

### Identify surrounding 4 grid points

In [1]:
import math # to use math functions like radians, atan2
import numpy as np
import polars as pl # to use polars dataframes for our data
import polars.selectors as cs # to use column selectors in polars

# Set the display width
pl.Config.set_tbl_cols(100)  # Set the number of polars df columns to display when printing
pl.Config.set_tbl_width_chars(200)  # Set the width of polars df columns in characters when printing

import cdsapi # to get the ERA5 data

from calendar import monthrange # to get the number of days in a month
from pathlib import Path # to work with file paths
import time # for time.sleep() to wait for the API to respond
import xarray as xr # to read the netCDF files




def get_surrounding_grid_points(lat, lon, interval=0.25):
    # Calculate the nearest grid point
    nearest_lat = np.round(lat / interval) * interval
    nearest_lon = np.round(lon / interval) * interval

    # Calculate surrounding grid points
    lat_points = [nearest_lat - interval, nearest_lat, nearest_lat + interval]
    lon_points = [nearest_lon - interval, nearest_lon, nearest_lon + interval]

    # Generate all combinations of surrounding grid points
    surrounding_points = [(lat, lon) for lat in lat_points for lon in lon_points]
    
    return surrounding_points

def haversine(lat1, lon1, lat2, lon2):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert latitude and longitude from degrees to radians
    lat1_rad = math.radians(lat1)
    lon1_rad = math.radians(lon1)
    lat2_rad = math.radians(lat2)
    lon2_rad = math.radians(lon2)

    # Compute differences
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad

    # Haversine formula
    a = math.sin(dlat / 2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Distance in kilometers
    distance = R * c
    return distance

def get_closest_grid_points(lat, lon, num_points=4):
    surrounding_points = get_surrounding_grid_points(lat, lon)
    distances = [(point, haversine(lat, lon, point[0], point[1])) for point in surrounding_points]
    distances.sort(key=lambda x: x[1])
    closest_points = [point for point, distance in distances[:num_points]]
    return closest_points

# Kelmarsh coordinates
lat = 52.40
lon = -0.943
# find the ERA5 grid points closest to Kelmarsh - ERA5 data is on a 0.25 degree grid, both in latitude and longitude
closest_points = get_closest_grid_points(lat, lon)


# Print the closest points and their distances
for point in closest_points:
    distance = np.round(haversine(lat, lon, point[0], point[1]), 3)
    print(f'For point {point}, distance is {distance} km from Kelmarsh at {lat}, {lon}')

For point (52.5, -1.0), distance is 11.771 km from Kelmarsh at 52.4, -0.943
For point (52.25, -1.0), distance is 17.123 km from Kelmarsh at 52.4, -0.943
For point (52.5, -0.75), distance is 17.167 km from Kelmarsh at 52.4, -0.943
For point (52.25, -0.75), distance is 21.219 km from Kelmarsh at 52.4, -0.943


### get the data from CDS

As noted above, you have to get an API key first from <br>
CDS https://cds.climate.copernicus.eu/how-to-api




In [2]:

def download_era5_data(year, month, closest_points, output_dir, max_retries=3):
    # Initialize the CDS API client
    c = cdsapi.Client()

    # Get the number of days in the month
    num_days = monthrange(year, month)[1]

    # Ensure the output directory exists, create if not
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Calculate the expected range from the closest points
    lat_min = min(point[0] for point in closest_points)
    lat_max = max(point[0] for point in closest_points)
    lon_min = min(point[1] for point in closest_points)
    lon_max = max(point[1] for point in closest_points)

    # Request ERA5 data with retry mechanism
    for attempt in range(max_retries):
        try:
            c.retrieve(
                'reanalysis-era5-single-levels',
                {
                    'product_type': 'reanalysis',
                    'format': 'netcdf',  # Options: 'grib' or 'netcdf'
                    'variable': [
                        '2m_temperature', '10m_u_component_of_wind', '10m_v_component_of_wind',
                        'surface_pressure', '100m_u_component_of_wind', '100m_v_component_of_wind'
                    ],
                    'year': str(year),
                    'month': f'{month:02d}',
                    'day': [f'{day:02d}' for day in range(1, num_days + 1)],
                    'time': [
                        '00:00', '01:00', '02:00', '03:00', '04:00', '05:00',
                        '06:00', '07:00', '08:00', '09:00', '10:00', '11:00',
                        '12:00', '13:00', '14:00', '15:00', '16:00', '17:00',
                        '18:00', '19:00', '20:00', '21:00', '22:00', '23:00',
                    ],
                    'area': [
                        lat_max, lon_min, lat_min, lon_max,
                    ],  # North, West, South, East
                },
                output_dir / f'era5_single_levels_{year}{month:02d}.nc'  # Output file name
            )
            print(f'Successfully downloaded data for {year}-{month:02d} into {output_dir}')
            break  # Exit the loop if the download is successful
        except Exception as e:
            print(f'Failed to download data for {year}-{month:02d} on attempt {attempt + 1}: {e}')
            if attempt < max_retries - 1:
                print('Retrying...')
                time.sleep(5)  # Wait for 5 seconds before retrying
            else:
                print('Max retries reached. Moving to the next month.')

# Example usage: Loop through months of a year and download data for each month to our directory
output_dir = Path('era5_netcdf_files')
year = 2020
for month in range(1, 13):
    download_era5_data(year, month, closest_points, output_dir)

2024-12-17 19:15:52,127 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:15:52,128 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:15:52,128 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:15:52,846 INFO Request ID is 486f66a2-7621-4ab0-b61e-d4b44d57614c
2024-12-17

d2728c0e5919e2e50f16aa39bdfdb55d.nc:   0%|          | 0.00/161k [00:00<?, ?B/s]

Successfully downloaded data for 2020-01 into era5_netcdf_files


2024-12-17 19:16:10,543 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:16:10,544 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:16:10,544 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:16:11,465 INFO Request ID is 1b600917-7744-4f72-a31d-ca7e79f7257a
2024-12-17

c1bdc3cce7be0dffdad129e203d6f08.nc:   0%|          | 0.00/158k [00:00<?, ?B/s]

Successfully downloaded data for 2020-02 into era5_netcdf_files


2024-12-17 19:16:28,375 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:16:28,376 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:16:28,377 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:16:29,157 INFO Request ID is 9e50d47a-00c5-4e57-8380-79a6bc48fedd
2024-12-17

b375e45f388cf7de01cce01532d50de8.nc:   0%|          | 0.00/161k [00:00<?, ?B/s]

Successfully downloaded data for 2020-03 into era5_netcdf_files


2024-12-17 19:16:37,093 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:16:37,094 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:16:37,094 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:16:37,799 INFO Request ID is 4103e1b2-e081-4a85-9d00-e02eab56b0e4
2024-12-17

a289662f1624b177659d2e6bebac123.nc:   0%|          | 0.00/160k [00:00<?, ?B/s]

Successfully downloaded data for 2020-04 into era5_netcdf_files


2024-12-17 19:16:54,051 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:16:54,051 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:16:54,052 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:16:54,698 INFO Request ID is a7637218-fd74-4cb2-ad5d-b11030c9bc3a
2024-12-17

2952f0a39b767628d152f78f57913e96.nc:   0%|          | 0.00/161k [00:00<?, ?B/s]

Successfully downloaded data for 2020-05 into era5_netcdf_files


2024-12-17 19:17:06,108 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:17:06,109 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:17:06,110 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:17:07,209 INFO Request ID is aca6e91f-b9fb-4f2d-b841-5974e0790e55
2024-12-17

8ac66563eb5814d8f1c1bdf76fb003c9.nc:   0%|          | 0.00/160k [00:00<?, ?B/s]

Successfully downloaded data for 2020-06 into era5_netcdf_files


2024-12-17 19:17:15,205 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:17:15,206 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:17:15,206 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:17:16,133 INFO Request ID is a58a062c-10c5-49bc-a08e-d147f0b70796
2024-12-17

9199b409382cae63fb5e06df71dc422.nc:   0%|          | 0.00/161k [00:00<?, ?B/s]

Successfully downloaded data for 2020-07 into era5_netcdf_files


2024-12-17 19:17:27,648 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:17:27,649 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:17:27,649 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:17:28,612 INFO Request ID is e11babc2-05f0-446c-93c9-a1d402a6dbcc
2024-12-17

5ac2ae2e71402e9580609ac1a4dffbc3.nc:   0%|          | 0.00/162k [00:00<?, ?B/s]

Successfully downloaded data for 2020-08 into era5_netcdf_files


2024-12-17 19:17:50,154 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:17:50,154 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:17:50,155 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:17:51,448 INFO Request ID is 643cd7fd-b961-43b3-bbd4-7cee7d7b831b
2024-12-17

5f1493708bcba60d147ce8de8eb78e4f.nc:   0%|          | 0.00/159k [00:00<?, ?B/s]

Successfully downloaded data for 2020-09 into era5_netcdf_files


2024-12-17 19:18:08,613 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:18:08,614 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:18:08,614 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:18:09,553 INFO Request ID is 700bf67c-a7b2-4f08-8f0c-1210562fcc1e
2024-12-17

754984865d49c3793c0fbf254442a7a4.nc:   0%|          | 0.00/161k [00:00<?, ?B/s]

Successfully downloaded data for 2020-10 into era5_netcdf_files


2024-12-17 19:18:22,084 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:18:22,084 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:18:22,085 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:18:23,554 INFO Request ID is 959572e1-5f26-49b2-94b7-12bb3bb4441e
2024-12-17

9fdb62ae2e74a08664f2daca6fb4ab13.nc:   0%|          | 0.00/159k [00:00<?, ?B/s]

Successfully downloaded data for 2020-11 into era5_netcdf_files


2024-12-17 19:18:35,002 INFO [2024-09-28T00:00:00] **Welcome to the New Climate Data Store (CDS)!** This new system is in its early days of full operations and still undergoing enhancements and fine tuning. Some disruptions are to be expected. Your 
[feedback](https://jira.ecmwf.int/plugins/servlet/desk/portal/1/create/202) is key to improve the user experience on the new CDS for the benefit of everyone. Thank you.
2024-12-17 19:18:35,003 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2024-12-17 19:18:35,003 INFO [2024-09-16T00:00:00] Remember that you need to have an ECMWF account to use the new CDS. **Your old CDS credentials will not work in new CDS!**
[Forum announcement](https://forum.ecmwf.int/t/final-validated-era5-product-to-differ-from-era5t-in-july-2024/6685)
for details and watch it for further updates on this.
2024-12-17 19:18:35,739 INFO Request ID is 66e5b8d8-7df5-4622-97fe-3eafb47019d4
2024-12-17

cc3e78c47b45078d46c08402a80d25cd.nc:   0%|          | 0.00/162k [00:00<?, ?B/s]

Successfully downloaded data for 2020-12 into era5_netcdf_files


### combine nc files into polars df

The data is in netcdf format, so we mine the nc files and make a polars df


In [20]:

def load_nc_files_to_polars_df(nc_files):
    dfs = []
    for nc_file in nc_files:
        # Load the NetCDF file using xarray
        ds = xr.open_dataset(nc_file)

        # Convert xarray.Dataset to a Pandas DataFrame
        df = ds.to_dataframe().reset_index()

        # Convert Pandas DataFrame to Polars DataFrame
        pl_df = pl.from_pandas(df)

        # Append to the list of DataFrames
        dfs.append(pl_df)

    # Concatenate all Polars DataFrames
    combined_df = pl.concat(dfs)
    return combined_df

# Example usage: Load all .nc files in the output directory
output_dir = Path('era5_data')
nc_files = list(output_dir.glob('*.nc'))

# Load the NetCDF files into a Polars DataFrame, then clean up 
polars_df = (load_nc_files_to_polars_df(nc_files) # returns a polars dataframe
             .drop(['expver', 'number']) # drops the columns 'expver' and 'number' which didn't have data
             # Date, TimeStamp, latitude, longitude not in table of accepted abbreviations in section 4 of IEC 61400-25-2, 
             # so used section 7.2.4.2 name for TimeStamp, appending UTC to avoid confusion with local time
             # and table 45 names for latitude and longitude
             .rename({'valid_time':'TimeStamp_UTC', 'latitude':'latitude', 'longitude':'longitude', # lat/long names kept
                      'u10':'HorWdU_Alt10m', 'v10':'HorWdV_Alt10m', # some signals marked with 10m meaning 10 minutes,  
                      'u100':'HorWdU_Alt100m', 'v100':'HorWdV_Alt100m', # so add Alt so 10m is altitude AGL above ground level
                      't2m':'EnvTmp_Alt2m', 'sp':'EnvPres_Alt0m'})
             .with_columns(pl.col('TimeStamp_UTC').dt.cast_time_unit('ms').alias('TimeStamp_UTC')) # put ts in ms, needs to be consistent later for joins
             .sort(['TimeStamp_UTC', 'latitude', 'longitude']))

# rename to iec -25-2 naming convention
polars_df = polars_df

# consider variables used and if it is reasonable to store as float32 instead of float64 as it takes up 
# half the space in RAM and you can deal with larger datasets
# the command belos selects columns that are type float (32 or 64) and casts them to float32
polars_df = polars_df.cast({cs.float():pl.Float32})


In [21]:
# review data for completeness and reasonableness, and ts range
print(polars_df.describe())

shape: (9, 10)
┌────────────┬─────────────────────┬──────────┬───────────┬──────────────┬───────────────┬───────────────┬───────────────┬────────────────┬────────────────┐
│ statistic  ┆ TimeStamp_UTC       ┆ latitude ┆ longitude ┆ EnvTmp_Alt2m ┆ HorWdU_Alt10m ┆ HorWdV_Alt10m ┆ EnvPres_Alt0m ┆ HorWdU_Alt100m ┆ HorWdV_Alt100m │
│ ---        ┆ ---                 ┆ ---      ┆ ---       ┆ ---          ┆ ---           ┆ ---           ┆ ---           ┆ ---            ┆ ---            │
│ str        ┆ str                 ┆ f64      ┆ f64       ┆ f64          ┆ f64           ┆ f64           ┆ f64           ┆ f64            ┆ f64            │
╞════════════╪═════════════════════╪══════════╪═══════════╪══════════════╪═══════════════╪═══════════════╪═══════════════╪════════════════╪════════════════╡
│ count      ┆ 35136               ┆ 35136.0  ┆ 35136.0   ┆ 35136.0      ┆ 35136.0       ┆ 35136.0       ┆ 35136.0       ┆ 35136.0        ┆ 35136.0        │
│ null_count ┆ 0                   ┆ 0.0   

In [19]:
print(polars_df.head())

shape: (5, 10)
┌─────────────────────┬──────────┬───────────┬──────────────┬───────────────┬───────────────┬───────────────┬────────────────┬────────────────┬─────────────────────┐
│ TimeStamp_UTC       ┆ latitude ┆ longitude ┆ EnvTmp_Alt2m ┆ HorWdU_Alt10m ┆ HorWdV_Alt10m ┆ EnvPres_Alt0m ┆ HorWdU_Alt100m ┆ HorWdV_Alt100m ┆ TimeStamp           │
│ ---                 ┆ ---      ┆ ---       ┆ ---          ┆ ---           ┆ ---           ┆ ---           ┆ ---            ┆ ---            ┆ ---                 │
│ datetime[ns]        ┆ f32      ┆ f32       ┆ f32          ┆ f32           ┆ f32           ┆ f32           ┆ f32            ┆ f32            ┆ datetime[ms]        │
╞═════════════════════╪══════════╪═══════════╪══════════════╪═══════════════╪═══════════════╪═══════════════╪════════════════╪════════════════╪═════════════════════╡
│ 2020-01-01 00:00:00 ┆ 52.25    ┆ -1.0      ┆ 279.213928   ┆ -3.089966     ┆ 1.042038      ┆ 101720.429688 ┆ -5.053909      ┆ 2.368256       ┆ 2020-01-01 

In [6]:
# Ensure the output directory exists
cwd = Path.cwd()
output_dir = cwd / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
polars_df.write_csv(output_dir / 'era5_data.csv', datetime_format='%Y-%m-%d %H:%M:%S')
polars_df.write_parquet(output_dir / 'era5_data.parquet')