In [3]:
import pandas as pd

How did each president’s cabinets perform relative to others? I would like total cabinet numbers per president and per year aggregated for all variables. Put these aggregate totals in one table, in a separate tab.

In [4]:
from extract import INTERIM_DIR

df = pd.read_parquet(INTERIM_DIR / "data-parquet-editada-final-1.parquet")

In [6]:
df = df.rename(columns={"category (bureaucracy or cabinet)": "bureaucracy_cabinet"})

df["bureaucracy_cabinet"] = df["bureaucracy_cabinet"].str.replace("Ministério", "cabinet", regex=False)
df.head(5)

Unnamed: 0,president,conc_parc,party,agency,bureaucracy_cabinet,year,category
0,Dilma Rousseff,0.339623,,ABGF - Agencia Brasileira Gestora de Fundos Ga...,Burocracia,2015,neutra
1,Dilma Rousseff,0.962264,,AEB – Agência Espacial Brasileira,Burocracia,2015,neutra
2,Dilma Rousseff,0.909091,,AMAZUL - Amazônia Azul Tecnologias de Defesa S.A.,Burocracia,2015,neutra
3,Dilma Rousseff,0.955128,,AN – Arquivo Nacional,Burocracia,2015,neutra
4,Dilma Rousseff,0.996875,,ANA – Agência Nacional de Águas,Burocracia,2015,neutra


In [27]:
df_only_cabinet = df[df['bureaucracy_cabinet'] == 'cabinet']

df_aggregated = (
    df_only_cabinet
    .groupby("year")
    .agg(
        cabinet_count=('bureaucracy_cabinet', 'count'),
        conc_parc_mean=('conc_parc', 'mean')
    )
    .reset_index())

df_aggregated

Unnamed: 0,year,cabinet_count,conc_parc_mean
0,2015,19,0.763652
1,2016,36,0.772172
2,2017,33,0.752517
3,2018,17,0.735311
4,2019,18,0.742956
5,2020,19,0.732731
6,2021,19,0.775942
7,2022,19,0.749796
8,2023,36,0.774138
9,2024,42,0.776436


In [33]:
# Number of cabinets for each year
copy_df_aggregated = df_aggregated.copy()

for year in copy_df_aggregated["year"]:
    president = df[df["year"] == year].iloc[0]["president"]
    print(f"President in {year}: {president}")
    copy_df_aggregated.loc[copy_df_aggregated["year"] == year, 'president'] = president



# Calculando número médio de gabinetes por presidente
copy_df_aggregated = (
    copy_df_aggregated
    .groupby("president")
    .agg(
        mean_number_cabinets =("cabinet_count", lambda x: round(x.mean())),
        conc_parc_mean =("conc_parc_mean", lambda x: round(x.mean(), 2))
    )
    .reset_index()
)

copy_df_aggregated.columns = copy_df_aggregated.columns.str.capitalize()

dict_rename_columns = {
    "Mean_number_cabinets": "Mean Number of Cabinets",
    "Conc_parc_mean": "Mean % Conceded Partially",
}

copy_df_aggregated = copy_df_aggregated.rename(columns=dict_rename_columns)

display(copy_df_aggregated)

# Writing to excel
from extract import RAW_DIR

sheet_name = "Performance of Cabinets"

file_path = RAW_DIR / "Brazil-Aligned and Non-Aligned All Presidents(editada).xlsx"

with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    copy_df_aggregated.to_excel(writer, sheet_name=sheet_name, index=False)

President in 2015: Dilma Rousseff
President in 2016: Dilma Rousseff
President in 2017: Michel Temer
President in 2018: Michel Temer
President in 2019: Jair Bolsonaro
President in 2020: Jair Bolsonaro
President in 2021: Jair Bolsonaro
President in 2022: Jair Bolsonaro
President in 2023: Luiz Inácio Lula da Silva
President in 2024: Luiz Inácio Lula da Silva


Unnamed: 0,President,Mean Number of Cabinets,Mean % Conceded Partially
0,Dilma Rousseff,28,0.77
1,Jair Bolsonaro,19,0.75
2,Luiz Inácio Lula da Silva,39,0.78
3,Michel Temer,25,0.74


How did the cabinet as a whole perform compared to the bureaucracy? Per year, and all years?

In [57]:
df_aggregated = df.groupby(["year", "bureaucracy_cabinet"])["conc_parc"].mean().reset_index(name="mean_conc_parc")


# Converting dataframe back to format in excel
df_final_and_plot = df_aggregated
df_final_and_plot.columns = df_final_and_plot.columns.str.capitalize()
df_final_and_plot = df_final_and_plot.rename(columns={
    "Bureaucracy_cabinet": "Category (Bureaucracy or cabinet)",
    "Mean_conc_parc": "Mean % Conceded Partially",
    })

df_final_and_plot["Category (Bureaucracy or cabinet)"] = df_final_and_plot["Category (Bureaucracy or cabinet)"].str.replace("cabinet", "Ministério")

# Writing to excel
sheet_name = "Bureaucracy x Cabinet"

with pd.ExcelWriter(file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    df_final_and_plot.to_excel(writer, sheet_name=sheet_name, index=False)


Title is more than 31 characters. Some applications may not be able to read the file



In [52]:
import plotly.express as px

# Plot time series for 'conc_parc_mean' with lines for each bureaucracy_cabinet category
fig = px.line(
    df_final_and_plot,
    x='Year',
    y='Mean % Conceded Partially',
    color='Category (Bureaucracy or cabinet)',
    markers=True,
    title='Mean % Conceded Partially Over Years by Bureaucracy Cabinet',
)

fig.update_layout(
    xaxis=dict(dtick=1),  # Ensure each year is shown on the x-axis
    yaxis_title='Mean % Conceded Partially',
    xaxis_title='Year',
    title_x=0.5  # Center the title
)

fig.show()