# Abstract

Open-Meteo API calls to retrieve the following daily measurements at the coordinates of all South Carolina weather stations accessed in (1)... 

- Average Apparent Temperature (F)
- Average Temperature at 2 meters (F)
- Sunshine Duration (sec)
- Total Shortwave Radiation (MJ/m^2)
- Total Precipitation (mm)
- Total Precipitation Time (hrs)
- Total Evapotranspiration (mm)
- Weather Code (WMO)

In [None]:
pip install openmeteo-requests

In [None]:
pip install requests-cache retry-requests numpy pandas

In [2]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

In [1]:
%store -r stations_df
%store -r om_api

# API Calls: Daily Weather Measurements

# 2018 

There are 184 days in the 2018 date range (7/1/18 to 12/31/18).

There are 1391 sets of station coordinates. 

Should end with 255,944 rows.

In [29]:
list_of_2018_dataframes = []

for index, row in stations_df.iterrows():
    
    latitude = row['latitude']
    longitude = row['longitude']
    
    url = "https://customer-archive-api.open-meteo.com/v1/archive"
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": "2018-07-01",
        "end_date": "2018-12-31",
        "daily": ["weather_code", "temperature_2m_mean", "apparent_temperature_mean", "sunshine_duration", "precipitation_sum", "precipitation_hours","shortwave_radiation_sum", "et0_fao_evapotranspiration"],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "timezone": "America/New_York",
        "apikey": om_api
    }
    
    try:
        responses = openmeteo.weather_api(url, params=params)
        
        for response in responses: 
            daily = response.Daily()
            daily_weather_code = daily.Variables(0).ValuesAsNumpy()
            daily_temperature_2m_mean = daily.Variables(1).ValuesAsNumpy()
            daily_apparent_temperature_mean = daily.Variables(2).ValuesAsNumpy()
            daily_sunshine_duration = daily.Variables(3).ValuesAsNumpy()
            daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
            daily_precipitation_hours = daily.Variables(5).ValuesAsNumpy()
            daily_shortwave_radiation_sum = daily.Variables(6).ValuesAsNumpy()
            daily_et0_fao_evapotranspiration = daily.Variables(7).ValuesAsNumpy()
            
            daily_data = {
                "date": pd.date_range(
                    start = pd.to_datetime(daily.Time(), unit = "s"),
                    end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
                    freq = pd.Timedelta(seconds = daily.Interval()),
                    inclusive = "left"
                ),
                "latitude": params['latitude'],
                "longitude": params['longitude']
            }
            
            daily_data["Weather Code"] = daily_weather_code
            daily_data["Average Temperature (2m)"] = daily_temperature_2m_mean
            daily_data["Average Temperature"] = daily_apparent_temperature_mean
            daily_data["Sunshine Duration (sec)"] = daily_sunshine_duration
            daily_data["Total Precipitation (mm)"] = daily_precipitation_sum
            daily_data["Total Precipitation Time (hrs)"] = daily_precipitation_hours
            daily_data["Total Shortwave Radiation"] = daily_shortwave_radiation_sum
            daily_data["Evapotranspiration (mm)"] = daily_et0_fao_evapotranspiration
            
            individual_df_2018 = pd.DataFrame(data = daily_data)
            
            list_of_2018_dataframes.append(individual_df_2018)
            
    except Exception as e:
        print(f"Error processing data for Latitude={latitude}, Longitude={longitude}: {e}")
        
df_2018 = pd.concat(list_of_2018_dataframes, axis = 0, ignore_index = True)

In [30]:
df_2018

Unnamed: 0,date,latitude,longitude,Weather Code,Average Temperature (2m),Average Temperature,Sunshine Duration (sec),Total Precipitation (mm),Total Precipitation Time (hrs),Total Shortwave Radiation,Evapotranspiration (mm)
0,2018-07-01 05:00:00,34.136584,-82.396848,61.0,80.070641,87.661308,41894.300781,3.000000,4.0,21.360001,4.607244
1,2018-07-02 05:00:00,34.136584,-82.396848,51.0,80.640656,88.680756,46569.902344,0.300000,3.0,21.500000,4.642812
2,2018-07-03 05:00:00,34.136584,-82.396848,61.0,80.584404,89.844307,46800.000000,1.900000,5.0,22.549999,4.768721
3,2018-07-04 05:00:00,34.136584,-82.396848,55.0,80.674400,86.568260,46800.000000,2.600000,9.0,23.860001,5.555318
4,2018-07-05 05:00:00,34.136584,-82.396848,53.0,80.306908,87.647400,40072.718750,2.100000,11.0,22.200001,4.757941
...,...,...,...,...,...,...,...,...,...,...,...
255939,2018-12-27 05:00:00,34.116680,-80.169610,55.0,48.371601,43.125034,6654.927734,1.100000,2.0,3.700000,0.868593
255940,2018-12-28 05:00:00,34.116680,-80.169610,63.0,64.372849,63.267872,698.788147,23.900002,16.0,3.170000,0.828855
255941,2018-12-29 05:00:00,34.116680,-80.169610,51.0,58.661591,57.369099,21236.679688,0.300000,1.0,7.190000,1.238698
255942,2018-12-30 05:00:00,34.116680,-80.169610,55.0,56.456600,56.656277,5486.902832,3.900000,5.0,4.020000,0.732361


### Final 2018 Data Frame

In [31]:
%store df_2018

Stored 'df_2018' (DataFrame)


# 2019

There are 365 days in the 2019 date range (1/1/19 to 12/31/19).

There are 1391 sets of station coordinates. 

Should end with 507,715 rows.

In [32]:
list_of_2019_dataframes = []

for index, row in stations_df.iterrows():
    
    latitude = row['latitude']
    longitude = row['longitude']
    
    url = "https://customer-archive-api.open-meteo.com/v1/archive"
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": "2019-01-01",
        "end_date": "2019-12-31",
        "daily": ["weather_code", "temperature_2m_mean", "apparent_temperature_mean", "sunshine_duration", "precipitation_sum", "precipitation_hours","shortwave_radiation_sum", "et0_fao_evapotranspiration"],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "timezone": "America/New_York",
        "apikey": om_api
    }
    
    try:
        responses = openmeteo.weather_api(url, params=params)
        
        for response in responses: 
            daily = response.Daily()
            daily_weather_code = daily.Variables(0).ValuesAsNumpy()
            daily_temperature_2m_mean = daily.Variables(1).ValuesAsNumpy()
            daily_apparent_temperature_mean = daily.Variables(2).ValuesAsNumpy()
            daily_sunshine_duration = daily.Variables(3).ValuesAsNumpy()
            daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
            daily_precipitation_hours = daily.Variables(5).ValuesAsNumpy()
            daily_shortwave_radiation_sum = daily.Variables(6).ValuesAsNumpy()
            daily_et0_fao_evapotranspiration = daily.Variables(7).ValuesAsNumpy()
            
            daily_data = {
                "date": pd.date_range(
                    start = pd.to_datetime(daily.Time(), unit = "s"),
                    end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
                    freq = pd.Timedelta(seconds = daily.Interval()),
                    inclusive = "left"
                ),
                "latitude": params['latitude'],
                "longitude": params['longitude']
            }
            
            daily_data["Weather Code"] = daily_weather_code
            daily_data["Average Temperature (2m)"] = daily_temperature_2m_mean
            daily_data["Average Temperature"] = daily_apparent_temperature_mean
            daily_data["Sunshine Duration (sec)"] = daily_sunshine_duration
            daily_data["Total Precipitation (mm)"] = daily_precipitation_sum
            daily_data["Total Precipitation Time (hrs)"] = daily_precipitation_hours
            daily_data["Total Shortwave Radiation"] = daily_shortwave_radiation_sum
            daily_data["Evapotranspiration (mm)"] = daily_et0_fao_evapotranspiration
            
            individual_df_2019 = pd.DataFrame(data = daily_data)
            
            list_of_2019_dataframes.append(individual_df_2019)
            
    except Exception as e:
        print(f"Error processing data for Latitude={latitude}, Longitude={longitude}: {e}")
        
df_2019 = pd.concat(list_of_2019_dataframes, axis = 0, ignore_index = True)

In [33]:
df_2019

Unnamed: 0,date,latitude,longitude,Weather Code,Average Temperature (2m),Average Temperature,Sunshine Duration (sec),Total Precipitation (mm),Total Precipitation Time (hrs),Total Shortwave Radiation,Evapotranspiration (mm)
0,2019-01-01 05:00:00,34.136584,-82.396848,55.0,61.961895,62.603054,21790.306641,3.800000,12.0,6.32,1.088321
1,2019-01-02 05:00:00,34.136584,-82.396848,61.0,55.234402,55.283508,19542.789062,8.200001,10.0,6.43,1.026528
2,2019-01-03 05:00:00,34.136584,-82.396848,61.0,53.828144,53.649441,0.000000,9.399999,19.0,3.11,0.550976
3,2019-01-04 05:00:00,34.136584,-82.396848,63.0,55.684399,52.857800,2341.489990,25.900002,17.0,1.88,0.633580
4,2019-01-05 05:00:00,34.136584,-82.396848,3.0,50.104397,43.145077,32261.658203,0.000000,0.0,12.10,2.277871
...,...,...,...,...,...,...,...,...,...,...,...
507710,2019-12-27 05:00:00,34.116680,-80.169610,51.0,57.386597,57.678593,7081.608398,0.400000,4.0,4.44,0.801568
507711,2019-12-28 05:00:00,34.116680,-80.169610,53.0,61.477844,62.403259,27044.839844,1.400000,5.0,8.83,1.464592
507712,2019-12-29 05:00:00,34.116680,-80.169610,51.0,65.992851,66.873985,7448.800781,2.500000,11.0,4.54,0.998632
507713,2019-12-30 05:00:00,34.116680,-80.169610,55.0,64.222847,60.096100,16863.421875,1.600000,5.0,6.37,2.286707


### Final 2019 Data Frame

In [34]:
%store df_2019

Stored 'df_2019' (DataFrame)


# 2020

There are 365 days in the 2020 date range (1/1/20 to 12/31/20).

There are 1391 sets of station coordinates. 

Should end with 509,106 rows.

In [35]:
list_of_2020_dataframes = []

for index, row in stations_df.iterrows():
    
    latitude = row['latitude']
    longitude = row['longitude']
    
    url = "https://customer-archive-api.open-meteo.com/v1/archive"
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": "2020-01-01",
        "end_date": "2020-12-31",
        "daily": ["weather_code", "temperature_2m_mean", "apparent_temperature_mean", "sunshine_duration", "precipitation_sum", "precipitation_hours","shortwave_radiation_sum", "et0_fao_evapotranspiration"],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "timezone": "America/New_York",
        "apikey": om_api
    }
    
    try:
        responses = openmeteo.weather_api(url, params=params)
        
        for response in responses: 
            daily = response.Daily()
            daily_weather_code = daily.Variables(0).ValuesAsNumpy()
            daily_temperature_2m_mean = daily.Variables(1).ValuesAsNumpy()
            daily_apparent_temperature_mean = daily.Variables(2).ValuesAsNumpy()
            daily_sunshine_duration = daily.Variables(3).ValuesAsNumpy()
            daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
            daily_precipitation_hours = daily.Variables(5).ValuesAsNumpy()
            daily_shortwave_radiation_sum = daily.Variables(6).ValuesAsNumpy()
            daily_et0_fao_evapotranspiration = daily.Variables(7).ValuesAsNumpy()
            
            daily_data = {
                "date": pd.date_range(
                    start = pd.to_datetime(daily.Time(), unit = "s"),
                    end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
                    freq = pd.Timedelta(seconds = daily.Interval()),
                    inclusive = "left"
                ),
                "latitude": params['latitude'],
                "longitude": params['longitude']
            }
            
            daily_data["Weather Code"] = daily_weather_code
            daily_data["Average Temperature (2m)"] = daily_temperature_2m_mean
            daily_data["Average Temperature"] = daily_apparent_temperature_mean
            daily_data["Sunshine Duration (sec)"] = daily_sunshine_duration
            daily_data["Total Precipitation (mm)"] = daily_precipitation_sum
            daily_data["Total Precipitation Time (hrs)"] = daily_precipitation_hours
            daily_data["Total Shortwave Radiation"] = daily_shortwave_radiation_sum
            daily_data["Evapotranspiration (mm)"] = daily_et0_fao_evapotranspiration
            
            individual_df_2020 = pd.DataFrame(data = daily_data)
            
            list_of_2020_dataframes.append(individual_df_2020)
            
    except Exception as e:
        print(f"Error processing data for Latitude={latitude}, Longitude={longitude}: {e}")
        
df_2020 = pd.concat(list_of_2020_dataframes, axis = 0, ignore_index = True)

In [36]:
df_2020

Unnamed: 0,date,latitude,longitude,Weather Code,Average Temperature (2m),Average Temperature,Sunshine Duration (sec),Total Precipitation (mm),Total Precipitation Time (hrs),Total Shortwave Radiation,Evapotranspiration (mm)
0,2020-01-01 05:00:00,34.136584,-82.396848,1.0,45.431896,38.124744,32208.197266,0.000000,0.0,11.74,2.164433
1,2020-01-02 05:00:00,34.136584,-82.396848,61.0,45.731899,41.887112,0.000000,9.400000,12.0,2.55,0.576333
2,2020-01-03 05:00:00,34.136584,-82.396848,65.0,56.888157,56.882107,3555.427979,28.600000,18.0,2.92,0.486068
3,2020-01-04 05:00:00,34.136584,-82.396848,61.0,53.903152,48.807217,29132.552734,4.600000,10.0,9.01,1.958391
4,2020-01-05 05:00:00,34.136584,-82.396848,0.0,42.998154,35.668446,32260.296875,0.000000,0.0,12.48,2.024268
...,...,...,...,...,...,...,...,...,...,...,...
509101,2020-12-27 05:00:00,34.116680,-80.169610,1.0,37.084095,30.616323,31995.583984,0.000000,0.0,12.27,1.497812
509102,2020-12-28 05:00:00,34.116680,-80.169610,1.0,45.221603,38.950520,31913.667969,0.000000,0.0,11.91,1.957834
509103,2020-12-29 05:00:00,34.116680,-80.169610,1.0,46.931606,41.209785,32036.193359,0.000000,0.0,11.68,1.753830
509104,2020-12-30 05:00:00,34.116680,-80.169610,51.0,41.966595,35.992867,2051.678467,1.500000,9.0,3.19,0.751038


### Final 2020 Data Frame

In [37]:
%store df_2020

Stored 'df_2020' (DataFrame)


# 2021

There are 365 days in the 2021 date range (1/1/21 to 12/31/21).

There are 1391 sets of station coordinates. 

Should end with 507,715 rows.

In [38]:
list_of_2021_dataframes = []

for index, row in stations_df.iterrows():
    
    latitude = row['latitude']
    longitude = row['longitude']
    
    url = "https://customer-archive-api.open-meteo.com/v1/archive"
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": "2021-01-01",
        "end_date": "2021-12-31",
        "daily": ["weather_code", "temperature_2m_mean", "apparent_temperature_mean", "sunshine_duration", "precipitation_sum", "precipitation_hours","shortwave_radiation_sum", "et0_fao_evapotranspiration"],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "timezone": "America/New_York",
        "apikey": om_api
    }
    
    try:
        responses = openmeteo.weather_api(url, params=params)
        
        for response in responses: 
            daily = response.Daily()
            daily_weather_code = daily.Variables(0).ValuesAsNumpy()
            daily_temperature_2m_mean = daily.Variables(1).ValuesAsNumpy()
            daily_apparent_temperature_mean = daily.Variables(2).ValuesAsNumpy()
            daily_sunshine_duration = daily.Variables(3).ValuesAsNumpy()
            daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
            daily_precipitation_hours = daily.Variables(5).ValuesAsNumpy()
            daily_shortwave_radiation_sum = daily.Variables(6).ValuesAsNumpy()
            daily_et0_fao_evapotranspiration = daily.Variables(7).ValuesAsNumpy()
            
            daily_data = {
                "date": pd.date_range(
                    start = pd.to_datetime(daily.Time(), unit = "s"),
                    end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
                    freq = pd.Timedelta(seconds = daily.Interval()),
                    inclusive = "left"
                ),
                "latitude": params['latitude'],
                "longitude": params['longitude']
            }
            
            daily_data["Weather Code"] = daily_weather_code
            daily_data["Average Temperature (2m)"] = daily_temperature_2m_mean
            daily_data["Average Temperature"] = daily_apparent_temperature_mean
            daily_data["Sunshine Duration (sec)"] = daily_sunshine_duration
            daily_data["Total Precipitation (mm)"] = daily_precipitation_sum
            daily_data["Total Precipitation Time (hrs)"] = daily_precipitation_hours
            daily_data["Total Shortwave Radiation"] = daily_shortwave_radiation_sum
            daily_data["Evapotranspiration (mm)"] = daily_et0_fao_evapotranspiration
            
            individual_df_2021 = pd.DataFrame(data = daily_data)
            
            list_of_2021_dataframes.append(individual_df_2021)
            
    except Exception as e:
        print(f"Error processing data for Latitude={latitude}, Longitude={longitude}: {e}")
        
df_2021 = pd.concat(list_of_2021_dataframes, axis = 0, ignore_index = True)

In [39]:
df_2021

Unnamed: 0,date,latitude,longitude,Weather Code,Average Temperature (2m),Average Temperature,Sunshine Duration (sec),Total Precipitation (mm),Total Precipitation Time (hrs),Total Shortwave Radiation,Evapotranspiration (mm)
0,2021-01-01 05:00:00,34.136584,-82.396848,61.0,57.806900,57.518314,0.000000,15.1,17.0,2.21,0.383980
1,2021-01-02 05:00:00,34.136584,-82.396848,53.0,58.913147,58.710938,13162.907227,2.2,4.0,7.19,1.185900
2,2021-01-03 05:00:00,34.136584,-82.396848,51.0,49.268147,43.689457,32179.769531,0.1,1.0,12.15,1.754881
3,2021-01-04 05:00:00,34.136584,-82.396848,1.0,43.770649,38.798576,32254.884766,0.0,0.0,12.32,1.638202
4,2021-01-05 05:00:00,34.136584,-82.396848,53.0,48.533146,42.746639,32053.800781,0.5,1.0,10.73,1.846038
...,...,...,...,...,...,...,...,...,...,...,...
507710,2021-12-27 05:00:00,34.116680,-80.169610,3.0,62.497849,60.834930,31907.689453,0.0,0.0,9.44,2.042057
507711,2021-12-28 05:00:00,34.116680,-80.169610,1.0,66.634094,64.932846,31948.117188,0.0,0.0,10.51,2.477924
507712,2021-12-29 05:00:00,34.116680,-80.169610,61.0,67.725349,66.929489,17106.019531,3.0,7.0,6.35,1.805849
507713,2021-12-30 05:00:00,34.116680,-80.169610,63.0,67.830345,68.986305,388.228912,8.4,5.0,2.89,0.786320


### Final 2021 Data Frame

In [40]:
%store df_2021

Stored 'df_2021' (DataFrame)


# 2022

There are 365 days in the 2022 date range (1/1/22 to 12/31/22).

There are 1391 sets of station coordinates. 

Should end with 507,715 rows.

In [41]:
list_of_2022_dataframes = []

for index, row in stations_df.iterrows():
    
    latitude = row['latitude']
    longitude = row['longitude']
    
    url = "https://customer-archive-api.open-meteo.com/v1/archive"
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": "2022-01-01",
        "end_date": "2022-12-31",
        "daily": ["weather_code", "temperature_2m_mean", "apparent_temperature_mean", "sunshine_duration", "precipitation_sum", "precipitation_hours","shortwave_radiation_sum", "et0_fao_evapotranspiration"],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "timezone": "America/New_York",
        "apikey": om_api
    }
    
    try:
        responses = openmeteo.weather_api(url, params=params)
        
        for response in responses: 
            daily = response.Daily()
            daily_weather_code = daily.Variables(0).ValuesAsNumpy()
            daily_temperature_2m_mean = daily.Variables(1).ValuesAsNumpy()
            daily_apparent_temperature_mean = daily.Variables(2).ValuesAsNumpy()
            daily_sunshine_duration = daily.Variables(3).ValuesAsNumpy()
            daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
            daily_precipitation_hours = daily.Variables(5).ValuesAsNumpy()
            daily_shortwave_radiation_sum = daily.Variables(6).ValuesAsNumpy()
            daily_et0_fao_evapotranspiration = daily.Variables(7).ValuesAsNumpy()
            
            daily_data = {
                "date": pd.date_range(
                    start = pd.to_datetime(daily.Time(), unit = "s"),
                    end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
                    freq = pd.Timedelta(seconds = daily.Interval()),
                    inclusive = "left"
                ),
                "latitude": params['latitude'],
                "longitude": params['longitude']
            }
            
            daily_data["Weather Code"] = daily_weather_code
            daily_data["Average Temperature (2m)"] = daily_temperature_2m_mean
            daily_data["Average Temperature"] = daily_apparent_temperature_mean
            daily_data["Sunshine Duration (sec)"] = daily_sunshine_duration
            daily_data["Total Precipitation (mm)"] = daily_precipitation_sum
            daily_data["Total Precipitation Time (hrs)"] = daily_precipitation_hours
            daily_data["Total Shortwave Radiation"] = daily_shortwave_radiation_sum
            daily_data["Evapotranspiration (mm)"] = daily_et0_fao_evapotranspiration
            
            individual_df_2022 = pd.DataFrame(data = daily_data)
            
            list_of_2022_dataframes.append(individual_df_2022)
            
    except Exception as e:
        print(f"Error processing data for Latitude={latitude}, Longitude={longitude}: {e}")
        
df_2022 = pd.concat(list_of_2022_dataframes, axis = 0, ignore_index = True)

In [42]:
df_2022

Unnamed: 0,date,latitude,longitude,Weather Code,Average Temperature (2m),Average Temperature,Sunshine Duration (sec),Total Precipitation (mm),Total Precipitation Time (hrs),Total Shortwave Radiation,Evapotranspiration (mm)
0,2022-01-01 05:00:00,34.136584,-82.396848,65.0,69.105644,70.160042,28261.746094,16.599998,11.0,8.04,1.847120
1,2022-01-02 05:00:00,34.136584,-82.396848,63.0,63.281902,63.680222,0.000000,20.500000,19.0,2.40,0.700097
2,2022-01-03 05:00:00,34.136584,-82.396848,63.0,47.528141,41.497627,25237.679688,20.400000,11.0,10.72,1.451815
3,2022-01-04 05:00:00,34.136584,-82.396848,3.0,38.648151,31.894783,32255.849609,0.000000,0.0,12.57,1.659615
4,2022-01-05 05:00:00,34.136584,-82.396848,51.0,45.570652,40.554169,28815.099609,0.600000,2.0,9.66,1.398301
...,...,...,...,...,...,...,...,...,...,...,...
507710,2022-12-27 05:00:00,34.116680,-80.169610,3.0,36.150352,29.765182,31852.566406,0.000000,0.0,11.05,1.381710
507711,2022-12-28 05:00:00,34.116680,-80.169610,0.0,39.769104,33.510372,31972.189453,0.000000,0.0,12.15,1.608355
507712,2022-12-29 05:00:00,34.116680,-80.169610,0.0,44.302845,39.408916,32010.080078,0.000000,0.0,11.58,1.583548
507713,2022-12-30 05:00:00,34.116680,-80.169610,3.0,53.070343,50.436115,13972.616211,0.000000,0.0,6.39,1.168030


### Final 2022 Data Frame

In [43]:
%store df_2022

Stored 'df_2022' (DataFrame)


# 2023

There are 182 days in the 2023 date range (1/1/23 to 07/01/23).

There are 1391 sets of station coordinates. 

Should end with 253,162 rows.

In [45]:
list_of_2023_dataframes = []

for index, row in stations_df.iterrows():
    
    latitude = row['latitude']
    longitude = row['longitude']
    
    url = "https://customer-archive-api.open-meteo.com/v1/archive"
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": "2023-01-01",
        "end_date": "2023-07-01",
        "daily": ["weather_code", "temperature_2m_mean", "apparent_temperature_mean", "sunshine_duration", "precipitation_sum", "precipitation_hours","shortwave_radiation_sum", "et0_fao_evapotranspiration"],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "timezone": "America/New_York",
        "apikey": om_api
    }
    
    try:
        responses = openmeteo.weather_api(url, params=params)
        
        for response in responses: 
            daily = response.Daily()
            daily_weather_code = daily.Variables(0).ValuesAsNumpy()
            daily_temperature_2m_mean = daily.Variables(1).ValuesAsNumpy()
            daily_apparent_temperature_mean = daily.Variables(2).ValuesAsNumpy()
            daily_sunshine_duration = daily.Variables(3).ValuesAsNumpy()
            daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
            daily_precipitation_hours = daily.Variables(5).ValuesAsNumpy()
            daily_shortwave_radiation_sum = daily.Variables(6).ValuesAsNumpy()
            daily_et0_fao_evapotranspiration = daily.Variables(7).ValuesAsNumpy()
            
            daily_data = {
                "date": pd.date_range(
                    start = pd.to_datetime(daily.Time(), unit = "s"),
                    end = pd.to_datetime(daily.TimeEnd(), unit = "s"),
                    freq = pd.Timedelta(seconds = daily.Interval()),
                    inclusive = "left"
                ),
                "latitude": params['latitude'],
                "longitude": params['longitude']
            }
            
            daily_data["Weather Code"] = daily_weather_code
            daily_data["Average Temperature (2m)"] = daily_temperature_2m_mean
            daily_data["Average Temperature"] = daily_apparent_temperature_mean
            daily_data["Sunshine Duration (sec)"] = daily_sunshine_duration
            daily_data["Total Precipitation (mm)"] = daily_precipitation_sum
            daily_data["Total Precipitation Time (hrs)"] = daily_precipitation_hours
            daily_data["Total Shortwave Radiation"] = daily_shortwave_radiation_sum
            daily_data["Evapotranspiration (mm)"] = daily_et0_fao_evapotranspiration
            
            individual_df_2023 = pd.DataFrame(data = daily_data)
            
            list_of_2023_dataframes.append(individual_df_2023)
            
    except Exception as e:
        print(f"Error processing data for Latitude={latitude}, Longitude={longitude}: {e}")
        
df_2023 = pd.concat(list_of_2023_dataframes, axis = 0, ignore_index = True)

In [46]:
df_2023

Unnamed: 0,date,latitude,longitude,Weather Code,Average Temperature (2m),Average Temperature,Sunshine Duration (sec),Total Precipitation (mm),Total Precipitation Time (hrs),Total Shortwave Radiation,Evapotranspiration (mm)
0,2023-01-01 05:00:00,34.136584,-82.396848,3.0,53.404408,52.237949,6608.301758,0.000000,0.0,4.820000,0.778179
1,2023-01-02 05:00:00,34.136584,-82.396848,2.0,55.166901,54.420609,32195.529297,0.000000,0.0,9.930000,1.498788
2,2023-01-03 05:00:00,34.136584,-82.396848,63.0,58.943150,58.996288,6988.876953,19.400000,6.0,4.620000,0.699919
3,2023-01-04 05:00:00,34.136584,-82.396848,65.0,61.294399,62.130630,7200.000000,57.799995,16.0,2.850000,0.493260
4,2023-01-05 05:00:00,34.136584,-82.396848,0.0,52.226898,46.572941,32262.767578,0.000000,0.0,12.210000,2.400721
...,...,...,...,...,...,...,...,...,...,...,...
253157,2023-06-27 05:00:00,34.116680,-80.169610,65.0,79.871605,82.551491,47052.375000,16.300001,3.0,26.430000,6.364467
253158,2023-06-28 05:00:00,34.116680,-80.169610,1.0,80.392845,83.361023,47151.582031,0.000000,0.0,27.620001,6.413352
253159,2023-06-29 05:00:00,34.116680,-80.169610,2.0,80.981606,84.895317,46800.000000,0.000000,0.0,25.879999,5.930440
253160,2023-06-30 05:00:00,34.116680,-80.169610,55.0,81.030342,86.170250,36383.578125,1.700000,4.0,17.040001,4.238773


### Final 2023 Data Frame

In [47]:
%store df_2023

Stored 'df_2023' (DataFrame)


# Editing Data Frames

### Combining Individual Data Frames

In [3]:
%store -r df_2018
%store -r df_2019
%store -r df_2020
%store -r df_2021
%store -r df_2022
%store -r df_2023

In [4]:
weather_list = [df_2018, df_2019, df_2020, df_2021, df_2022, df_2023]
weather = pd.concat(weather_list)

### Drop Duplicate Rows

In [7]:
weather.drop_duplicates(inplace=True)

### Fix 'date' Column

In [8]:
last_8_characters_set = list(weather['date'].astype(str).apply(lambda x: x[-8:]))
last_8_characters_df = pd.DataFrame(data = last_8_characters_set)

In [9]:
last_8_characters_df[0].unique()

array(['05:00:00'], dtype=object)

In [10]:
weather['date'] = weather['date'].astype(str).apply(lambda x: x[:-8])

In [12]:
weather['date'] = weather['date'].str.strip()

### Rename Columns

In [14]:
newColNames = {
    'date': 'Date',
    'latitude': 'Latitude',
    'longitude': 'Longitude',
    'Weather Code': 'Weather Code (WMO)',
    'Average Temperature (2m)': 'Avg. Temperature at 2m (F)',
    'Average Temperature': 'Avg. Apparent Temperature (F)',
    'Total Shortwave Radiation': 'Total Shortwave Radiation (MJ/m2)'
}

In [15]:
weather.rename(columns = newColNames, inplace = True)
weather

Unnamed: 0,Date,Latitude,Longitude,Weather Code (WMO),Avg. Temperature at 2m (F),Avg. Apparent Temperature (F),Sunshine Duration (sec),Total Precipitation (mm),Total Precipitation Time (hrs),Total Shortwave Radiation (MJ/m2),Evapotranspiration (mm)
0,2018-07-01,34.136584,-82.396848,61.0,80.070641,87.661308,41894.300781,3.000000,4.0,21.360001,4.607244
1,2018-07-02,34.136584,-82.396848,51.0,80.640656,88.680756,46569.902344,0.300000,3.0,21.500000,4.642812
2,2018-07-03,34.136584,-82.396848,61.0,80.584404,89.844307,46800.000000,1.900000,5.0,22.549999,4.768721
3,2018-07-04,34.136584,-82.396848,55.0,80.674400,86.568260,46800.000000,2.600000,9.0,23.860001,5.555318
4,2018-07-05,34.136584,-82.396848,53.0,80.306908,87.647400,40072.718750,2.100000,11.0,22.200001,4.757941
...,...,...,...,...,...,...,...,...,...,...,...
253157,2023-06-27,34.116680,-80.169610,65.0,79.871605,82.551491,47052.375000,16.300001,3.0,26.430000,6.364467
253158,2023-06-28,34.116680,-80.169610,1.0,80.392845,83.361023,47151.582031,0.000000,0.0,27.620001,6.413352
253159,2023-06-29,34.116680,-80.169610,2.0,80.981606,84.895317,46800.000000,0.000000,0.0,25.879999,5.930440
253160,2023-06-30,34.116680,-80.169610,55.0,81.030342,86.170250,36383.578125,1.700000,4.0,17.040001,4.238773


# Final Data Frame

In [16]:
%store weather

Stored 'weather' (DataFrame)
