In [2]:
import pandas as pd
import configparser

In [3]:
config = configparser.ConfigParser()
config.read('config.ini')

raw_data_procedures = config['Paths']['raw_data_procedures']
df_raw_procedures = pd.read_csv(raw_data_procedures, delimiter=';')

raw_data_subgroups = config['Paths']['raw_data_subgroups']
df_raw_subgroups = pd.read_csv(raw_data_subgroups, delimiter=";")

In [4]:
def clean_raw_data(df, vars_str):

        df = df.drop(columns=['Total'])

        df = df[df[f'{vars_str}'] != 'Total']
        return df
    
def clean_and_convert_to_time_series(df, vars_str):


    cleaned_df = clean_raw_data(df, vars_str)
    
    id_vars = [f'{vars_str}']
    
    ts_df = pd.melt(cleaned_df, id_vars=id_vars, var_name='ds', value_name='y')
    
    def parse_custom_date(date_str):

        month_mapping = {
            'Jan': 'January',
            'Fev': 'February',
            'Mar': 'March',
            'Abr': 'April',
            'Mai': 'May',
            'Jun': 'June',
            'Jul': 'July',
            'Ago': 'August',
            'Set': 'September',
            'Out': 'October',
            'Nov': 'November',
            'Dez': 'December'
        }
        
        if date_str != 'Total':
            month_str, year_str = date_str.split('/')
            month_str = month_mapping.get(month_str, month_str)
            return pd.to_datetime(f"{month_str}/{year_str}", format='%B/%Y')
        
        return date_str 

    ts_df['ds'] = ts_df['ds'].apply(parse_custom_date)

    ts_df.set_index('ds', inplace=True)
    
    return ts_df


In [5]:
ts_proced = clean_and_convert_to_time_series(df=df_raw_procedures, vars_str='Procedimento')
ts_subgroup = clean_and_convert_to_time_series(df=df_raw_subgroups, vars_str='SubGrup proced')


In [6]:
ts_proced = ts_proced.rename(columns={"Procedimento": "unique_id"})
ts_subgroup = ts_subgroup.rename(columns={"SubGrup proced": "unique_id"})

In [7]:
ts_proced.sort_values(by=['unique_id', 'ds'], inplace=True)
ts_subgroup.sort_values(by=['unique_id', 'ds'], inplace=True)

In [8]:
# Save the combined time series dataset to a CSV file
output_csv_procedures = config['Paths']['output_csv_procedures']
ts_proced.to_csv(output_csv_procedures, sep=',', index=True)

output_csv_subgroups = config['Paths']['output_csv_subgroups']
ts_subgroup.to_csv(output_csv_subgroups, sep=',', index=True)

print(f"The combined time series dataset has been saved.")

The combined time series dataset has been saved.
