# **Imports**

In [22]:
import pandas as pd
from openpyxl import load_workbook
from dataclasses import dataclass
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# **Constants**

In [23]:
@dataclass(frozen=True)
class Pathes:
    path_turn_on_off_heating: str = '../main_datasets/processed_datasets/6. Плановые-Внеплановые отключения 01.10.2023-30.04.2023.csv'
    path_events_for_period_2023_params: str = '../main_datasets/processed_datasets/События за период_01.10.2023-31.12.2023____pivot_table.csv'
    path_events_for_period_2023: str = '../main_datasets/processed_datasets/События за период_01.10.2023-31.12.2023.xlsx____uploading_table.csv'
    path_events_for_period_2024: str = '../main_datasets/processed_datasets/События_за_период_01.01.2024-30.04.2024____uploading_table.csv'
    path_events_for_period_2024_params: str = '../main_datasets/processed_datasets/События_за_период_01.01.2024-30.04.2024____pivot_table.csv'
    path_to_timeline: str = '../variables/timeline'
    path_to_weather_dataset: str = '../main_datasets/exogens_params/weather_dataset.csv'

In [24]:
date_columns =  [
    'Дата создания во внешней системе', 'Дата закрытия', 'Дата и время завершения события во внешней системе',
    'Дата регистрации отключения', 'Планируемая дата отключения', 'Планируемая дата включения', 'Фактическая дата отключения'
    'Фактическая дата включения'
]

In [4]:
lat = 55.787715
lon = 37.775631
max_date_for_api = datetime(year=2024, month=6, day=18)
weather_params = ["temperature_2m", "relative_humidity_2m", "rain", "showers", "snowfall", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high"]

# **Functions**

In [5]:
def date_columns_to_date_format(df: pd.DataFrame) -> pd.DataFrame:
    for i in date_columns:
        if i in df.columns:
            df[i] = pd.to_datetime(df[i])
    return df

In [6]:
def load_dataset(path: str) -> pd.DataFrame:
    df = pd.read_csv(path)
    df = date_columns_to_date_format(df)
    return df

# **Loading tables**

In [7]:
turn_on_off_heating = load_dataset(Pathes.path_turn_on_off_heating)
events_for_period_2023 = load_dataset(Pathes.path_events_for_period_2023)
events_for_period_2024 = load_dataset(Pathes.path_events_for_period_2024)
events_for_period_2023_params = load_dataset(Pathes.path_events_for_period_2023_params)
events_for_period_2024_params = load_dataset(Pathes.path_events_for_period_2024_params)


In [11]:
usefull_events = events_for_period_2024_params['Названия строк'].to_list()

In [12]:
events_for_period_2023__only_usefull_events = events_for_period_2023.loc[events_for_period_2023['Наименование'].isin(usefull_events)]
events_for_period_2024__only_usefull_events = events_for_period_2024.loc[events_for_period_2024['Наименование'].isin(usefull_events)]

# **Get weather**

In [27]:
min1_2023 = events_for_period_2023__only_usefull_events['Дата создания во внешней системе'].min()
min2_2024 = events_for_period_2024__only_usefull_events['Дата создания во внешней системе'].min()
min_date = min(min1_2023, min2_2024)

In [28]:
max1_2023 = events_for_period_2023__only_usefull_events['Дата создания во внешней системе'].max()
max2_2024 = events_for_period_2024__only_usefull_events['Дата создания во внешней системе'].max()
max_date = max(max1_2023, max2_2024)

In [29]:
min_date

Timestamp('2023-01-06 00:58:29')

In [30]:
str(min_date)

'2023-01-06 00:58:29'

In [33]:
str(min_date + timedelta(days=10))[:10]

'2023-01-16'

In [40]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": lat,
	"longitude": lon,
	"hourly": 'temperature',
	"start_date": '2021-01-01',
	"end_date": '2021-01-02'
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")



Coordinates 55.8125°N 37.75°E
Elevation 143.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


In [41]:
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

                        date  temperature_2m
0  2021-01-01 00:00:00+00:00             NaN
1  2021-01-01 01:00:00+00:00             NaN
2  2021-01-01 02:00:00+00:00             NaN
3  2021-01-01 03:00:00+00:00             NaN
4  2021-01-01 04:00:00+00:00             NaN
5  2021-01-01 05:00:00+00:00             NaN
6  2021-01-01 06:00:00+00:00             NaN
7  2021-01-01 07:00:00+00:00             NaN
8  2021-01-01 08:00:00+00:00             NaN
9  2021-01-01 09:00:00+00:00             NaN
10 2021-01-01 10:00:00+00:00             NaN
11 2021-01-01 11:00:00+00:00             NaN
12 2021-01-01 12:00:00+00:00             NaN
13 2021-01-01 13:00:00+00:00             NaN
14 2021-01-01 14:00:00+00:00             NaN
15 2021-01-01 15:00:00+00:00             NaN
16 2021-01-01 16:00:00+00:00             NaN
17 2021-01-01 17:00:00+00:00             NaN
18 2021-01-01 18:00:00+00:00             NaN
19 2021-01-01 19:00:00+00:00             NaN
20 2021-01-01 20:00:00+00:00             NaN
21 2021-01

In [36]:
hourly_dataframe.to_csv(Pathes.path_to_weather_dataset, index=False)