In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
def temperature_extraction(path:str, years:list)->pd.DataFrame:

    df_list = []

    for year in years:
        print("-----------------------------------------------------------------")
        print(f"Reading files from year {year}")
        print("-----------------------------------------------------------------")
        full_path = os.path.join(path,str(year))

        file_list = [file for file in os.listdir(full_path) if os.path.isfile(os.path.join(full_path, file))]

        total_files = len(file_list)

        
        print(f"A total of {total_files} found | Initiating file extraction")
        print("-----------------------------------------------------------------")
        n = 1
        for file in file_list:
        
            print(f"Reading file {n} of {total_files} | Progress: {np.round(n/total_files,2)*100}%                 ", end = "\r")
            file_path = os.path.join(full_path,file)

            df_extracted = pd.read_csv(file_path,encoding = 'latin-1', header = 8, sep = ";")

            df_extracted['datetime'] = df_extracted['Data'] + " " + df_extracted['Hora UTC']

            df_extracted['datetime'] = pd.to_datetime(df_extracted['datetime'], format = "%Y/%m/%d %H%M %Z")

            df_extracted = df_extracted.rename(columns = {"TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)": "temperature"})
            

            df_extracted = df_extracted[['datetime','temperature']]

            file_name_list = file.split("_")

            region = file_name_list[1]
            state = file_name_list[2]
            city = file_name_list[4]

            df_extracted['region'] = region
            df_extracted['state'] = state
            df_extracted['city'] = city

            df_list.append(df_extracted)
            n+=1
    df_raw = pd.concat(df_list)
    return(df_raw)


In [3]:
def temperature_processing(df_raw:pd.DataFrame)->pd.DataFrame:


    df_raw['temperature'] = df_raw['temperature'].replace(",", ".", regex=True)
    df_raw['temperature'] = df_raw['temperature'].astype(float)

    df_raw = df_raw[['datetime', 'temperature']]
    df_grouped = df_raw.groupby('datetime').mean().reset_index()

    return(df_grouped)

In [4]:
def load_extraction(path:str)->pd.DataFrame:
        
        df_list = []
        print("-----------------------------------------------------------------")
        print(f"Reading files")
        print("-----------------------------------------------------------------")


        file_list = [file for file in os.listdir(path) if os.path.isfile(os.path.join(path, file))]

        total_files = len(file_list)

        print(f"A total of {total_files} found | Initiating file extraction")
        print("-----------------------------------------------------------------")
        
        for file in file_list:
                file_path = os.path.join(path,file)

                df_extracted = pd.read_csv(file_path, sep = ";")
                
                df_list.append(df_extracted)

        df_raw = pd.concat(df_list)

        return(df_raw)
                
    

In [5]:
def load_processing(df_raw:pd.DataFrame)->pd.DataFrame:

    df_raw = df_raw.rename(columns = {'din_instante': 'datetime',
                          'nom_subsistema':'sub',
                          'val_cargaenergiahomwmed':'carga_sub',
                          'id_subsistema':'id_sub'} )
    
    df_raw = df_raw.pivot_table(index = ["datetime"], columns=["id_sub"], values = "carga_sub")
    df_raw.reset_index(inplace = True)

    df_raw['datetime'] = pd.to_datetime(df_raw['datetime'])

    #Summing all coluns to reach total load
    sub_markets =  df_raw.select_dtypes(include=['float64', 'int64']).columns.to_list()
    df_raw['total_load'] = df_raw[sub_markets].sum(axis = 1)

    df_processed = df_raw.drop(columns = sub_markets, axis = 1)

    return(df_processed)

In [6]:
temperature_raw_path = "/mnt/e/github/load-forecast/01.database/raw/temperature"
temperature_processed_path = "/mnt/e/github/load-forecast/01.database/processed/temperature"

load_raw_path = "/mnt/e/github/load-forecast/01.database/raw/load"
load_processed_path = "/mnt/e/github/load-forecast/01.database/processed/load"

years_list = [2022,2023]

#################################################################################
############################ TEMPERATURE PROCESSING #############################
#################################################################################

df_temperature_raw = temperature_extraction(path = temperature_raw_path,years = years_list)
df_temperature_processed = temperature_processing(df_raw = df_temperature_raw)

temperature_file_path = os.path.join(temperature_processed_path,f'{years_list[0]}_{years_list[1]}_temperature_processed.csv')
df_temperature_processed.to_csv(temperature_file_path)

#################################################################################
############################### LOAD PROCESSING #################################
#################################################################################


df_load_raw = load_extraction(path = load_raw_path)
df_load_processed = load_processing(df_raw = df_load_raw)

load_file_path = os.path.join(load_processed_path,f'{2021}_{2023}_load_processed.csv')
df_load_processed.to_csv(load_file_path)

-----------------------------------------------------------------
Reading files from year 2022
-----------------------------------------------------------------
A total of 567 found | Initiating file extraction
-----------------------------------------------------------------
-----------------------------------------------------------------       
Reading files from year 2023
-----------------------------------------------------------------
A total of 567 found | Initiating file extraction
-----------------------------------------------------------------
-----------------------------------------------------------------       
Reading files
-----------------------------------------------------------------
A total of 3 found | Initiating file extraction
-----------------------------------------------------------------
