In [11]:
#import necessary packages for project

#openmeteo is an open source api for weather data
import openmeteo_requests

import os
import glob
import requests_cache
import pandas as pd
from retry_requests import retry

In [12]:

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": 39.6295,
	"longitude": -79.9559,
	"hourly": ["temperature_2m", "precipitation"],
	"daily": ["temperature_2m_max", "temperature_2m_min", "precipitation_sum"],
	"temperature_unit": "fahrenheit",
	"wind_speed_unit": "mph",
	"precipitation_unit": "inch",
	"timezone": "America/New_York",
	"past_days": 92,
	"forecast_days": 16
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(1).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["precipitation"] = hourly_precipitation

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["precipitation_sum"] = daily_precipitation_sum

daily_dataframe = pd.DataFrame(data = daily_data)
daily_dataframe

Coordinates 39.641422271728516°N -79.95689392089844°E
Elevation 271.0 m asl
Timezone b'America/New_York' b'EDT'
Timezone difference to GMT+0 -14400 s
                          date  temperature_2m  precipitation
0    2024-07-22 04:00:00+00:00       65.684303            0.0
1    2024-07-22 05:00:00+00:00       65.594299            0.0
2    2024-07-22 06:00:00+00:00       66.314301            0.0
3    2024-07-22 07:00:00+00:00       67.034302            0.0
4    2024-07-22 08:00:00+00:00       68.114304            0.0
...                        ...             ...            ...
2587 2024-11-06 23:00:00+00:00       51.374302            0.0
2588 2024-11-07 00:00:00+00:00       47.324303            0.0
2589 2024-11-07 01:00:00+00:00       45.344299            0.0
2590 2024-11-07 02:00:00+00:00       44.534302            0.0
2591 2024-11-07 03:00:00+00:00       43.814301            0.0

[2592 rows x 3 columns]


Unnamed: 0,date,temperature_2m_max,temperature_2m_min,precipitation_sum
0,2024-07-22 04:00:00+00:00,80.714302,63.974300,0.078740
1,2024-07-23 04:00:00+00:00,84.134300,61.634300,0.007874
2,2024-07-24 04:00:00+00:00,85.394302,65.594299,0.263780
3,2024-07-25 04:00:00+00:00,85.034302,63.254299,0.291339
4,2024-07-26 04:00:00+00:00,83.774300,58.034298,0.000000
...,...,...,...,...
103,2024-11-02 04:00:00+00:00,75.044296,52.994301,0.047244
104,2024-11-03 04:00:00+00:00,58.574303,40.754299,0.059055
105,2024-11-04 04:00:00+00:00,58.034298,35.084301,0.000000
106,2024-11-05 04:00:00+00:00,59.024300,39.584301,0.000000


In [13]:
path = os.getcwd()
print(path)

c:\Users\ALL2429\source_repos\ML_Gas_Analysis


In [36]:
#Import file for daily burns from customer (**NOTE: data has been randomized for )
reported_burns = pd.read_excel(str(path) + "\Ridgway_Randomized_Data.xlsx")
prev_weather = pd.read_excel(str(path) + "\HDD_WV.xlsx")



In [37]:
reported_burns

Unnamed: 0.1,Unnamed: 0,Date,Randomized Total Nom,Randomized Reported Burn
0,,2023-01-01,6287.430371,5624.479859
1,,2023-01-02,6031.587336,4954.683237
2,,2023-01-03,3321.416004,2093.128369
3,,2023-01-04,6382.194807,3131.200413
4,,2023-01-05,6595.220969,5008.043738
...,...,...,...,...
695,,2024-11-26,1151.347315,1467.682839
696,,2024-11-27,1187.827413,1514.185935
697,,2024-11-28,1181.022544,1505.511411
698,,2024-11-29,928.009777,1182.982761


In [38]:
reported_burns = reported_burns.drop(reported_burns.columns[0], axis=1)

In [39]:
prev_weather

Unnamed: 0,Date,NOAA Temp High,NOAA Temp Low,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6
0,2023-01-01,59,49,,,,
1,2023-01-02,65,54,,,,
2,2023-01-03,68,59,,,,
3,2023-01-04,69,42,,,,
4,2023-01-05,51,39,,,,
...,...,...,...,...,...,...,...
695,2024-11-26,57,41,,,,
696,2024-11-27,58,36,,,,
697,2024-11-28,46,34,,,,
698,2024-11-29,49,32,,,,


In [48]:
prev_weather = prev_weather.iloc[:, :3]

In [49]:
prev_weather

Unnamed: 0,Date,NOAA Temp High,NOAA Temp Low
0,2023-01-01,59,49
1,2023-01-02,65,54
2,2023-01-03,68,59
3,2023-01-04,69,42
4,2023-01-05,51,39
...,...,...,...
695,2024-11-26,57,41
696,2024-11-27,58,36
697,2024-11-28,46,34
698,2024-11-29,49,32
