In [1]:
import warnings

import pandas as pd

warnings.filterwarnings("ignore")

In [2]:
def tratamento_consumo(sheet_name):
    meses = {
        "JAN": 1,
        "FEV": 2,
        "MAR": 3,
        "ABR": 4,
        "MAI": 5,
        "JUN": 6,
        "JUL": 7,
        "AGO": 8,
        "SET": 9,
        "OUT": 10,
        "NOV": 11,
        "DEZ": 12,
    }
    ufs = {
        "Acre": "AC",
        "Alagoas": "AL",
        "Amazonas": "AM",
        "Amapá": "AP",
        "Bahia": "BA",
        "Ceará": "CE",
        "Distrito Federal": "DF",
        "Espírito Santo": "ES",
        "Goiás": "GO",
        "Maranhão": "MA",
        "Minas Gerais": "MG",
        "Mato Grosso do Sul": "MS",
        "Mato Grosso": "MT",
        "Pará": "PA",
        "Paraíba": "PB",
        "Pernambuco": "PE",
        "Piauí": "PI",
        "Paraná": "PR",
        "Rio de Janeiro": "RJ",
        "Rio Grande do Norte": "RN",
        "Rondônia": "RO",
        "Roraima": "RR",
        "Rio Grande do Sul": "RS",
        "Santa Catarina": "SC",
        "Sergipe": "SE",
        "São Paulo": "SP",
        "Tocantins": "TO",
    }

    df = pd.read_excel(
        "/mnt/x/dados/consumo_energia_eletrica/dados.xls",
        sheet_name,
        skiprows=4,
        skipfooter=1,
        usecols="A:IG",
    )
    df_transposta = df.T
    df_transposta.reset_index(inplace=True)
    df_transposta.drop(df_transposta[["index", 2]], inplace=True, axis=1)
    df_transposta.columns = df_transposta.iloc[0]
    df_transposta = df_transposta[1:]
    df_transposta.columns = [
        "ano",
        "mes",
        "Rondônia",
        "Acre",
        "Amazonas",
        "Roraima",
        "Pará",
        "Amapá",
        "Tocantins",
        "Maranhão",
        "Piauí",
        "Ceará",
        "Rio Grande do Norte",
        "Paraíba",
        "Pernambuco",
        "Alagoas",
        "Sergipe",
        "Bahia",
        "Minas Gerais",
        "Espírito Santo",
        "Rio de Janeiro",
        "São Paulo",
        "Paraná",
        "Santa Catarina",
        "Rio Grande do Sul",
        "Mato Grosso do Sul",
        "Mato Grosso",
        "Goiás",
        "Distrito Federal",
    ]
    df_transposta_melted = pd.melt(
        df_transposta,
        id_vars=["ano", "mes"],
        var_name="Estado",
        value_name="Valor",
    )
    df_transposta_melted["ano"].ffill(inplace=True)
    df_transposta_melted["mes"] = df_transposta_melted["mes"].map(meses)

    df_transposta_melted.rename(
        columns={"Estado": "sigla_uf", "Valor": "consumo"}, inplace=True
    )
    df_transposta_melted["sigla_uf"] = df_transposta_melted["sigla_uf"].map(
        ufs
    )
    return df_transposta_melted


tipos_consumo = [
    "Total",
    "Cativo",
    "Residencial",
    "Industrial",
    "Comercial",
    "Outros",
]
dfs_consumo = []

# Realizando o tratamento para cada tipo de consumo
for i, tipo in enumerate(tipos_consumo):
    if i < 6:
        df = tratamento_consumo(i + 9)
    df["tipo_consumo"] = tipo
    dfs_consumo.append(df)

# Concatenando todos os DataFrames
df_consumo = pd.concat(dfs_consumo)
df_consumo = df_consumo[["ano", "mes", "sigla_uf", "tipo_consumo", "consumo"]]

In [3]:
df_consumo.shape

(38880, 5)

In [3]:
def tratamento_consumidores(sheet_name):
    meses = {
        "JAN": 1,
        "FEV": 2,
        "MAR": 3,
        "ABR": 4,
        "MAI": 5,
        "JUN": 6,
        "JUL": 7,
        "AGO": 8,
        "SET": 9,
        "OUT": 10,
        "NOV": 11,
        "DEZ": 12,
    }
    ufs = {
        "Acre": "AC",
        "Alagoas": "AL",
        "Amazonas": "AM",
        "Amapá": "AP",
        "Bahia": "BA",
        "Ceará": "CE",
        "Distrito Federal": "DF",
        "Espírito Santo": "ES",
        "Goiás": "GO",
        "Maranhão": "MA",
        "Minas Gerais": "MG",
        "Mato Grosso do Sul": "MS",
        "Mato Grosso": "MT",
        "Pará": "PA",
        "Paraíba": "PB",
        "Pernambuco": "PE",
        "Piauí": "PI",
        "Paraná": "PR",
        "Rio de Janeiro": "RJ",
        "Rio Grande do Norte": "RN",
        "Rondônia": "RO",
        "Roraima": "RR",
        "Rio Grande do Sul": "RS",
        "Santa Catarina": "SC",
        "Sergipe": "SE",
        "São Paulo": "SP",
        "Tocantins": "TO",
    }

    df = pd.read_excel(
        "/mnt/x/dados/consumo_energia_eletrica/dados.xls",
        sheet_name,
        skiprows=4,
        skipfooter=1,
        usecols="A:IG",
    )
    df_transposta = df.T
    df_transposta.reset_index(inplace=True)
    df_transposta.drop(df_transposta[["index", 2]], inplace=True, axis=1)
    df_transposta.columns = df_transposta.iloc[0]
    df_transposta = df_transposta[1:]
    df_transposta.columns = [
        "ano",
        "mes",
        "Rondônia",
        "Acre",
        "Amazonas",
        "Roraima",
        "Pará",
        "Amapá",
        "Tocantins",
        "Maranhão",
        "Piauí",
        "Ceará",
        "Rio Grande do Norte",
        "Paraíba",
        "Pernambuco",
        "Alagoas",
        "Sergipe",
        "Bahia",
        "Minas Gerais",
        "Espírito Santo",
        "Rio de Janeiro",
        "São Paulo",
        "Paraná",
        "Santa Catarina",
        "Rio Grande do Sul",
        "Mato Grosso do Sul",
        "Mato Grosso",
        "Goiás",
        "Distrito Federal",
    ]
    df_transposta_melted = pd.melt(
        df_transposta,
        id_vars=["ano", "mes"],
        var_name="Estado",
        value_name="Valor",
    )
    df_transposta_melted["ano"].ffill(inplace=True)
    df_transposta_melted["mes"] = df_transposta_melted["mes"].map(meses)

    df_transposta_melted.rename(
        columns={"Estado": "sigla_uf", "Valor": "numero_consumidores"},
        inplace=True,
    )
    df_transposta_melted["sigla_uf"] = df_transposta_melted["sigla_uf"].map(
        ufs
    )
    return df_transposta_melted


tipos_consumidores = ["Residencial", "Industrial", "Comercial", "Outros"]
dfs_consumidores = []

# Realizando o tratamento para cada tipo de consumidores
for i, tipo in enumerate(tipos_consumidores):
    if i < 6:
        df = tratamento_consumidores(i + 15)
    df["tipo_consumo"] = tipo
    dfs_consumidores.append(df)

# Concatenando todos os DataFrames
df_consumidores = pd.concat(dfs_consumidores)
df_consumidores = df_consumidores[
    ["ano", "mes", "sigla_uf", "tipo_consumo", "numero_consumidores"]
]

In [12]:
df_total = pd.merge(
    df_consumo,
    df_consumidores,
    how="left",
    on=["ano", "mes", "sigla_uf", "tipo_consumo"],
)
df_total = df_total[
    [
        "ano",
        "mes",
        "sigla_uf",
        "tipo_consumo",
        "numero_consumidores",
        "consumo",
    ]
]
df_total["consumo"] = df_total["consumo"].astype(int)
df_total["numero_consumidores"] = (
    df_total["numero_consumidores"].fillna(0).astype(int)
)

In [14]:
df_total[
    (df_total["ano"] == 2004)
    & (df_total["sigla_uf"] == "PE")
    & (df_total["tipo_consumo"] == "Residencial")
]

Unnamed: 0,ano,mes,sigla_uf,tipo_consumo,numero_consumidores,consumo
15840,2004,1,PE,Residencial,2001833,228466
15841,2004,2,PE,Residencial,2004750,217764
15842,2004,3,PE,Residencial,2018407,225066
15843,2004,4,PE,Residencial,2026995,232170
15844,2004,5,PE,Residencial,1972048,210365
15845,2004,6,PE,Residencial,1963599,198030
15846,2004,7,PE,Residencial,1954839,192699
15847,2004,8,PE,Residencial,1949125,189991
15848,2004,9,PE,Residencial,1989788,224208
15849,2004,10,PE,Residencial,2022621,212547


In [46]:
df_total.to_csv(
    "/mnt/x/dados/consumo_energia_eletrica/consumo_energia.csv",
    sep=",",
    index=False,
    encoding="utf-8",
)