In [1]:
import pandas as pd
from utils import write_to_excel

How did each president’s cabinets perform relative to others? I would like total cabinet numbers per president and per year aggregated for all variables. Put these aggregate totals in one table, in a separate tab.

In [2]:
from extract import INTERIM_DIR

df = pd.read_parquet(INTERIM_DIR / "data-parquet-editada-final-1.parquet")

Arquivo excel não encontrado


In [3]:

# Reducing column name
df = df.rename(columns={"category (bureaucracy or cabinet)": "bureaucracy_cabinet"})
# Changing Ministério to cabinet
df["bureaucracy_cabinet"] = df["bureaucracy_cabinet"].str.replace("Ministério", "cabinet", regex=False)
# filling null values in party
df["party"] = df["party"].fillna("no party")

In [4]:
df.head(5)

Unnamed: 0,president,conc_parc,party,agency,bureaucracy_cabinet,year,category
0,Dilma Rousseff,0.339623,no party,ABGF - Agencia Brasileira Gestora de Fundos Ga...,Burocracia,2015,neutra
1,Dilma Rousseff,0.962264,no party,AEB – Agência Espacial Brasileira,Burocracia,2015,neutra
2,Dilma Rousseff,0.909091,no party,AMAZUL - Amazônia Azul Tecnologias de Defesa S.A.,Burocracia,2015,neutra
3,Dilma Rousseff,0.955128,no party,AN – Arquivo Nacional,Burocracia,2015,neutra
4,Dilma Rousseff,0.996875,no party,ANA – Agência Nacional de Águas,Burocracia,2015,neutra


In [5]:
# Selecting only rows of cabinet
df_only_cabinet = df[df['bureaucracy_cabinet'] == 'cabinet']

# Getting count of agencies in the cabinet for each year
df_agencies_year = df_only_cabinet.groupby("year")['bureaucracy_cabinet'].count().reset_index(name='count')

In [6]:
copy_df_agencies_year = df_agencies_year.copy()

# Adding president column again
for year in copy_df_agencies_year["year"]:
    president = df[df["year"] == year].iloc[0]["president"]
    print(f"President in {year}: {president}")
    copy_df_agencies_year.loc[copy_df_agencies_year["year"] == year, 'president'] = president

# Formatting to add table back to excel 
copy_df_agencies_year.columns = copy_df_agencies_year.columns.str.capitalize()
copy_df_agencies_year = copy_df_agencies_year.rename(columns={"Count": "Number of Cabinets",})

President in 2015: Dilma Rousseff
President in 2016: Dilma Rousseff
President in 2017: Michel Temer
President in 2018: Michel Temer
President in 2019: Jair Bolsonaro
President in 2020: Jair Bolsonaro
President in 2021: Jair Bolsonaro
President in 2022: Jair Bolsonaro
President in 2023: Luiz Inácio Lula da Silva
President in 2024: Luiz Inácio Lula da Silva


In [7]:
copy_df_agencies_year

Unnamed: 0,Year,Number of Cabinets,President
0,2015,19,Dilma Rousseff
1,2016,36,Dilma Rousseff
2,2017,33,Michel Temer
3,2018,17,Michel Temer
4,2019,18,Jair Bolsonaro
5,2020,19,Jair Bolsonaro
6,2021,19,Jair Bolsonaro
7,2022,19,Jair Bolsonaro
8,2023,36,Luiz Inácio Lula da Silva
9,2024,42,Luiz Inácio Lula da Silva


How did different parties in the cabinet perform year to year and on the aggregate, by president and overall? (aggregate by year, by president, and total).

In [8]:
# Performance year by year
# Separating only agencies that belong the cabinet and with party
df_only_parties = df_only_cabinet.loc[df["party"] != "no party"]

# Getting mean value of responses for each party over years
df_cabinet_parties = df_only_parties.groupby(["party", "year"])["conc_parc"].mean().reset_index()

# Adding presidents column back
df_cabinet_parties['president'] = df_cabinet_parties['year'].map(dict(zip(df["year"], df["president"])))

# Reducing the names of the presidents to better the plot
replace_names_dict = {
    "Michel Temer": "Temer",
    "Jair Bolsnaro": "Bolsonaro",
    "Luiz Inácio Lula da Silva": "Lula",
}
df_cabinet_parties["president"] = df_cabinet_parties["president"].replace(replace_names_dict)

In [9]:
import altair as alt
import matplotlib.pyplot as plt
from matplotlib.colors import to_hex

# Adding pallete with 20 colors for all the parties
num_parties = df_cabinet_parties['party'].nunique()
colors = plt.get_cmap('tab20').colors

# Convert RGB to HEX format
party_colors_hex = [to_hex(color) for color in (colors * ((num_parties // len(colors)) + 1))][:num_parties]

# Map the parties to the palette
party_list = sorted(df_cabinet_parties['party'].unique())
color_scale = alt.Scale(domain=party_list, range=party_colors_hex)

# Bar chart with grouped bars
bars = alt.Chart(df_cabinet_parties).mark_bar().encode(
    x=alt.X('year:O', title='Year', axis=alt.Axis(labelAngle=0)),
    xOffset='party:N',  # Group bars within each year
    y=alt.Y('conc_parc:Q', title='Percentage of Conceded and Partially conceded responses'),
    color=alt.Color('party:N', title='Party', scale=color_scale),
    tooltip=['year', 'party', 'conc_parc', 'president']
).properties(
    width=1200,
    height=400
)

# Annotations for Presidents
president_labels = alt.Chart(df_cabinet_parties.drop_duplicates('year')).mark_text(
    align='center',
    baseline='bottom',
    dy=-200,
    fontSize=14,
    fontWeight='bold'
).encode(
    x=alt.X('year:O', title=None, axis=alt.Axis(labels=False, ticks=False)),
    text='president:N'
)

# Combine bar chart and annotations
chart = (bars + president_labels).resolve_scale(x='independent')
chart

Processo do gráfico
- Retirei registros que não fossem do gabinete
- Retirei agências sem partido
- Agrupei valores por ano e por partido
- Adicionei os presidentes

In [10]:
# Performance by president

df_grouped = df_only_parties.groupby(["party", "president"])["conc_parc"].mean().reset_index()
df_grouped.head(5)

Unnamed: 0,party,president,conc_parc
0,DEM,Michel Temer,0.853594
1,MDB,Luiz Inácio Lula da Silva,0.859393
2,MDB,Michel Temer,0.739098
3,PCdoB,Luiz Inácio Lula da Silva,0.888889
4,PDB,Luiz Inácio Lula da Silva,0.658537


How did aligned parties in the cabinet perform per year and in the aggregate?

Favor me dar as médias para as instituições por presidente.

In [11]:
copy_df = df.copy()

df_mean_agencies_president = copy_df.groupby(["president", "agency"])["conc_parc"].mean().reset_index()
df_mean_agencies_president = df_mean_agencies_president.rename(columns={"conc_parc": "mean_conc_parc"})

In [12]:
from transform import prepare_columns_to_excel

# Preparing df for writing to excel
df_excel = prepare_columns_to_excel(df_mean_agencies_president)
df_excel

Unnamed: 0,President,Agency,% Mean Conceded Partially
0,Dilma Rousseff,ABGF - Agencia Brasileira Gestora de Fundos Ga...,0.336478
1,Dilma Rousseff,AEB – Agência Espacial Brasileira,0.942671
2,Dilma Rousseff,AGU – Advocacia-Geral da União,0.473912
3,Dilma Rousseff,AMAZUL - Amazônia Azul Tecnologias de Defesa S.A.,0.871212
4,Dilma Rousseff,AN – Arquivo Nacional,0.887139
...,...,...,...
1190,Michel Temer,UNIR – Fundação Universidade Federal de Rondônia,0.774731
1191,Michel Temer,UNIRIO – Universidade Federal do Estado do Rio...,0.709259
1192,Michel Temer,UNIVASF – Fundação Universidade Federal do Val...,0.795824
1193,Michel Temer,UTFPR – Universidade Tecnológica Federal do Pa...,0.832208
