In [9]:
# we need to install couple libraries in order to run this code
# please open terminal and install these libraries
# pip install cdsapi 
# pip install xarray netcdf4 
# pip install dask 
import os

home_dir = os.path.expanduser("~")
with open(os.path.join(home_dir, ".cdsapirc"), "w") as f:
    f.write('url: https://cds.climate.copernicus.eu/api/v2\n')
    f.write('key: 200733:630986af-80e7-4ee4-b43e-f1899a03ba7d\n')  # replace with your actual UID and API key

In [10]:
import cdsapi
import numpy as np
import pandas as pd
import csv
import xarray as xr
import dask

c = cdsapi.Client()

# Define the latitude/longitude boundaries for each state.
states = {
    'Alabama': [30.137521, -88.473228, 34.984, -84.889103],
    'Arizona': [31.3325, -114.8183, 37.0042, -109.045223],
    'Arkansas': [33.004106, -94.617919, 36.4996, -89.644838],
    'California': [32.5121, -124.6509, 42.0095, -114.1315],
    'Colorado': [36.993076, -109.045223, 41.0037, -102.0416],
    'Connecticut': [40.9667, -73.727775, 42.0506, -71.799217],
    'Delaware': [38.451132, -75.78902, 39.839007, -75.048939],
    'Florida': [24.396308, -87.634643, 31.000888, -80.031362],
    'Georgia': [30.357851, -85.605165, 35.000659, -80.751429],
    'Hawaii': [18.910361, -178.334305, 28.402123, -154.806773],
    'Idaho': [41.988057, -117.2430, 49.001146, -111.0435],
    'Illinois': [36.970298, -91.513079, 42.508481, -87.019935],
    'Indiana': [37.7713, -88.0997, 41.761368, -84.784580],
    'Iowa': [40.375501, -96.639485, 43.501196, -90.140061],
    'Kansas': [36.993076, -102.051769, 40.003162, -94.588413],
    'Kentucky': [36.497129, -89.571509, 39.147732, -81.964970],
    'Louisiana': [28.8832, -94.043147, 33.0195, -88.817017],
    'Maine': [42.956123, -71.082397, 47.459853, -66.934570],
    'Maryland': [37.911717, -79.487651, 39.723043, -75.048939],
    'Massachusetts': [41.186328, -73.508142, 42.886589, -69.859741],
    'Michigan': [41.696118, -90.418135, 48.306063, -82.413474],
    'Minnesota': [43.499356, -97.239209, 49.384358, -89.491739],
    'Mississippi': [30.1476, -91.655009, 34.996052, -88.097889],
    'Missouri': [35.9953, -95.774704, 40.61364, -89.098843],
    'Montana': [44.3582, -116.049153, 49.00139, -104.039138],
    'Nebraska': [39.999998, -104.053514, 43.001708, -95.30829],
    'Nevada': [35.001857, -120.006473, 42.002207, -114.039648],
    'New Hampshire': [42.697039, -72.557247, 45.305476, -70.610620],
    'New Jersey': [38.788657, -75.563392, 41.357423, -73.902454],
    'New Mexico': [31.3323, -109.0493, 37.0004, -103.001964],
    'New York': [40.477248, -79.762152, 45.01585, -71.777491],
    'North Carolina': [33.752877, -84.321869, 36.588117, -75.400120],
    'North Dakota': [45.935072, -104.0496, 49.000692, -96.554507],
    'Ohio': [38.403202, -84.820309, 41.977523, -80.518693],
    'Oklahoma': [33.615833, -103.0026, 37.002312, -94.431392],
    'Oregon': [41.991794, -124.703541, 46.299099, -116.463262],
    'Pennsylvania': [39.7198, -80.5191, 42.514690, -74.689516],
    'Rhode Island': [41.146339, -71.862772, 42.018798, -71.120570],
    'South Carolina': [32.0346, -83.3539, 35.215402, -78.540047],
    'South Dakota': [42.4796, -104.057698, 45.94545, -96.441159],
    'Tennessee': [34.9829, -90.310298, 36.6781, -81.6469],
    'Texas': [25.837377, -106.645646, 36.500704, -93.507817],
    'Utah': [36.997966, -114.052962, 42.0035, -109.045223],
    'Vermont': [42.726853, -73.43774, 45.016659, -71.504509],
    'Virginia': [36.540738, -83.6753, 39.466012, -75.242423],
    'Washington': [45.543541, -124.848974, 49.002494, -116.916031],
    'West Virginia': [37.201483, -82.644740, 40.638800, -77.719519],
    'Wisconsin': [42.491983, -92.888114, 47.302499, -86.2496],
    'Wyoming': [40.994746, -111.056888, 45.005904, -104.05216]
}
# Initialize a dictionary to hold the average temperatures for each state and each week of each year
state_temperatures = {state: {} for state in states}

# Define the years of interest
years = ["2016", "2017", "2018", "2019"]

# Use Dask to handle computations that don't fit into memory
dask.config.set(scheduler='processes')

# Iterate through each state and its boundaries
for state, boundaries in states.items():
    north, west, south, east = boundaries

    # Iterate through each year of interest
    for year in years:
        # Request the data from the CDS
        # Save the data as a netCDF file named "{state}_{year}.nc"
        c.retrieve(
            "reanalysis-era5-land",
            {
                "variable": "2m_temperature",
                "year": year,
                "month": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"],
                "day": ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"],
                "time": ["00:00", "06:00", "12:00", "18:00"],
                "format": "netcdf",
                "area": [north, west, south, east],
            },
            f"{state}_{year}.nc"
        )

        # Open the netCDF file using xarray with Dask
        ds = xr.open_dataset(f"{state}_{year}.nc", chunks={})

        # Convert the xarray Dataset to a pandas DataFrame.
        # Ensure that the time variable is used as the index.
        data = ds['t2m'].to_dataframe().reset_index().set_index('time')

        # Ensure the data is sorted by date.
        data = data.sort_index()

        # Resample the data to weekly frequency, calculating the mean for each week.
        weekly_data = data.resample('W').mean()

        # Convert temperatures from Kelvin to Fahrenheit
        weekly_data['t2m'] = (weekly_data['t2m'] - 273.15) * 9/5 + 32

        # Calculate the ISO week number and month for each week and add them as columns.
        weekly_data['week'] = weekly_data.index.to_series().apply(lambda x: x.isocalendar()[1])
        weekly_data['month'] = weekly_data.index.to_series().apply(lambda x: x.month)

        # Save the weekly data to the state_temperatures dictionary.
        state_temperatures[state][year] = weekly_data

# Create the CSV file and write the headers.
with open('state_temperatures.csv', 'w', newline='') as csvfile:
    fieldnames = ['State', 'Year', 'Month', 'Week', 'AverageTemperature']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

    # Write the data for each state, year, week and average temperature.
    for state, years in state_temperatures.items():
        for year, weeks in years.items():
            for week, row in weeks.iterrows():
                writer.writerow({'State': state, 'Year': year, 'Month': row['month'], 'Week': row['week'], 'AverageTemperature': row['t2m']})

2023-05-31 13:06:09,131 INFO Welcome to the CDS
2023-05-31 13:06:09,131 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-land
2023-05-31 13:06:09,681 INFO Request is completed
2023-05-31 13:06:09,681 INFO Downloading https://download-0015-clone.copernicus-climate.eu/cache-compute-0015/cache/data7/adaptor.mars.internal-1685490975.5304685-21280-11-293e7e75-5054-4606-80f2-bf0f9011240d.nc to Alabama_2016.nc (4.9M)
2023-05-31 13:06:12,338 INFO Download rate 1.9M/s   
2023-05-31 13:06:14,062 INFO Welcome to the CDS
2023-05-31 13:06:14,064 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-land
2023-05-31 13:06:14,321 INFO Downloading https://download-0009-clone.copernicus-climate.eu/cache-compute-0009/cache/data5/adaptor.mars.internal-1685491809.3860183-9593-20-33c12c37-b396-40dc-900f-d95e90d2b22e.nc to Alabama_2017.nc (4.9M)
2023-05-31 13:06:18,729 INFO Download rate 1.1M/s   
2023-05-31 13:06:20,516 INFO Welc