In [1]:
import pandas as pd
from utils import write_to_excel

How did each president’s cabinets perform relative to others? I would like total cabinet numbers per president and per year aggregated for all variables. Put these aggregate totals in one table, in a separate tab.

In [2]:
from extract import INTERIM_DIR
import os

print(os.getcwd())

df = pd.read_parquet(INTERIM_DIR / "data-parquet-editada-final-1.parquet")

c:\Users\daniel.almeida\Desktop\ptp\brazil-aligned


In [3]:
df = df.rename(columns={"category (bureaucracy or cabinet)": "bureaucracy_cabinet"})

# Changing column name
df["bureaucracy_cabinet"] = df["bureaucracy_cabinet"].str.replace("Ministério", "cabinet", regex=False)
# filling null values in party
df["party"] = df["party"].fillna("no party")

df.head(5)

Unnamed: 0,president,conc_parc,party,agency,bureaucracy_cabinet,year,category
0,Dilma Rousseff,0.339623,no party,ABGF - Agencia Brasileira Gestora de Fundos Ga...,Burocracia,2015,neutra
1,Dilma Rousseff,0.962264,no party,AEB – Agência Espacial Brasileira,Burocracia,2015,neutra
2,Dilma Rousseff,0.909091,no party,AMAZUL - Amazônia Azul Tecnologias de Defesa S.A.,Burocracia,2015,neutra
3,Dilma Rousseff,0.955128,no party,AN – Arquivo Nacional,Burocracia,2015,neutra
4,Dilma Rousseff,0.996875,no party,ANA – Agência Nacional de Águas,Burocracia,2015,neutra


In [34]:
df_only_cabinet = df[df['bureaucracy_cabinet'] == 'cabinet']
df_aggregated = df_only_cabinet.groupby("year")['bureaucracy_cabinet'].count().reset_index(name='count')

In [35]:
# Number of cabinets for each year
copy_df_aggregated = df_aggregated.copy()

for year in copy_df_aggregated["year"]:
    president = df[df["year"] == year].iloc[0]["president"]
    print(f"President in {year}: {president}")
    copy_df_aggregated.loc[copy_df_aggregated["year"] == year, 'president'] = president

copy_df_aggregated.columns = copy_df_aggregated.columns.str.capitalize()
copy_df_aggregated = copy_df_aggregated.rename(columns={"Count": "Number of Cabinets",})

# sheet_name = "Number of Cabinets"

# Writing to excel tab
copy_df_aggregated

President in 2015: Dilma Rousseff
President in 2016: Dilma Rousseff
President in 2017: Michel Temer
President in 2018: Michel Temer
President in 2019: Jair Bolsonaro
President in 2020: Jair Bolsonaro
President in 2021: Jair Bolsonaro
President in 2022: Jair Bolsonaro
President in 2023: Luiz Inácio Lula da Silva
President in 2024: Luiz Inácio Lula da Silva


Unnamed: 0,Year,Number of Cabinets,President
0,2015,19,Dilma Rousseff
1,2016,36,Dilma Rousseff
2,2017,33,Michel Temer
3,2018,17,Michel Temer
4,2019,18,Jair Bolsonaro
5,2020,19,Jair Bolsonaro
6,2021,19,Jair Bolsonaro
7,2022,19,Jair Bolsonaro
8,2023,36,Luiz Inácio Lula da Silva
9,2024,42,Luiz Inácio Lula da Silva


How did different parties in the cabinet perform year to year and on the aggregate, by president and overall? (aggregate by year, by president, and total).

In [74]:
display(df.head(5))
copy_df = df.copy()

Unnamed: 0,president,conc_parc,party,agency,bureaucracy_cabinet,year,category
0,Dilma Rousseff,0.339623,no party,ABGF - Agencia Brasileira Gestora de Fundos Ga...,Burocracia,2015,neutra
1,Dilma Rousseff,0.962264,no party,AEB – Agência Espacial Brasileira,Burocracia,2015,neutra
2,Dilma Rousseff,0.909091,no party,AMAZUL - Amazônia Azul Tecnologias de Defesa S.A.,Burocracia,2015,neutra
3,Dilma Rousseff,0.955128,no party,AN – Arquivo Nacional,Burocracia,2015,neutra
4,Dilma Rousseff,0.996875,no party,ANA – Agência Nacional de Águas,Burocracia,2015,neutra


In [75]:
# Performance year by year
# Separating only agencies that belong the cabinet and with party
df_only_cabinet = copy_df.loc[copy_df["bureaucracy_cabinet"] == 'cabinet']
df_only_parties = df_only_cabinet.loc[copy_df["party"] != "no party"]

# Aggregating by year
df_grouped = df_only_parties.groupby(["party", "year"])["conc_parc"].mean().reset_index()

# Adding presidents back
df_grouped['president'] = df_grouped['year'].map(dict(zip(copy_df["year"], copy_df["president"])))

# Reducing the names of the presidents
replace_names_dict = {
    "Michel Temer": "Temer",
    "Jair Bolsnaro": "Bolsonaro",
    "Luiz Inácio Lula da Silva": "Lula",
}

df_grouped["president"] = df_grouped["president"].replace(replace_names_dict)

df_grouped = df_grouped.rename(columns={"conc_parc": "% mean conceded partially"})

In [20]:
df_grouped

Unnamed: 0,party,year,% mean conceded partially,president
0,DEM,2017,0.853594,Temer
1,MDB,2017,0.739098,Temer
2,MDB,2024,0.859393,Lula
3,PCdoB,2024,0.888889,Lula
4,PDB,2024,0.658537,Lula
5,PDT,2024,0.796813,Lula
6,PMDB,2017,0.792453,Temer
7,PODE,2017,0.687688,Temer
8,PODE,2018,0.735311,Temer
9,PODE,2019,0.742956,Jair Bolsonaro


In [76]:
import altair as alt
# Bar chart with grouped bars
bars = alt.Chart(df_grouped).mark_bar().encode(
    x=alt.X('year:O', title='Year', axis=alt.Axis(labelAngle=0)),
    xOffset='party:N',  # Group bars within each year
    y=alt.Y('% mean conceded partially:Q', title='Mean % of Conceded and Partially conceded responses'),
    color=alt.Color('party:N', title='Party'),
    tooltip=['year', 'party', '% mean conceded partially', 'president']
).properties(
    width=1200,
    height=400
)

# Annotations for Presidents
president_labels = alt.Chart(df_grouped.drop_duplicates('year')).mark_text(
    align='center',
    baseline='bottom',
    dy=-200,
    fontSize=14,
    fontWeight='bold'
).encode(
    x=alt.X('year:O', title=None, axis=alt.Axis(labels=False, ticks=False)),
    text='president:N'
)

# Combine bar chart and annotations
chart = (bars + president_labels).resolve_scale(x='independent').properties(
    title="Mean performance of parties each year"
)
chart

In [77]:
# Save as an interactive HTML file
chart.save('visualization_performance_parties_years.html')

Processo do gráfico
- Retirei registros que não fossem do gabinete
- Retirei agências sem partido
- Agrupei valores por ano e por partido
- Adicionei os presidentes

In [71]:
# Performance by president
df_grouped = df_only_parties.groupby(["party", "president"])["conc_parc"].mean().reset_index()

df_grouped = df_grouped.rename(columns={"conc_parc": "% mean conceded partially"})

bars = alt.Chart(df_grouped).mark_bar().encode(
    x=alt.X('president:O', title='President', axis=alt.Axis(labelAngle=0)),
    xOffset='party:N',  # Group bars within each year
    y=alt.Y('% mean conceded partially:Q', title='Mean % of Conceded and Partially conceded responses'),
    color=alt.Color('party:N', title='Party'),
    tooltip=['party', '% mean conceded partially', 'president']
).properties(
    width=1200,
    height=400
)

chart = bars.properties(
    title="Mean performance of parties overall for each president"
)
chart

In [40]:
from extract import RAW_DIR

write_to_excel(df_grouped, RAW_DIR / "Brazil-Aligned and Non-Aligned All Presidents(editada).xlsx", "Performance Parties Overall")

In [72]:
chart.save('visualization_performance_parties_overall.html')

How did aligned parties in the cabinet perform per year and in the aggregate?

In [61]:
copy_df = df.copy()

# Separating only "alinhada" agencies
aligned_df = copy_df.loc[copy_df["category"] == "alinhada"]
# Separating only agencies in the cabinet
cabinet_df = aligned_df.loc[aligned_df["bureaucracy_cabinet"] == "cabinet"]

# Grouping by party and year
grouped_df = cabinet_df.groupby(["party", "year"])["conc_parc"].mean().reset_index()
# Adding president again
grouped_df["president"] = grouped_df["year"].map(dict(zip(copy_df["year"], copy_df["president"])))
# Renaming column with mean of conceded partially
grouped_df = grouped_df.rename(columns={"conc_parc": "% mean conceded partially"})

# Replacing presidents names
dict_replace_presidents = {
    "Michel Temer": "Temer",
    "Jair Bolsonaro": "Bolsonaro",
    "Dilma Rousseff": "Dilma",
    "Luiz Inácio Lula da Silva": "Lula",
}

grouped_df["president"] = grouped_df["president"].replace(dict_replace_presidents)
grouped_df

Unnamed: 0,party,year,% mean conceded partially,president
0,PMDB,2017,0.792453,Temer
1,PODE,2018,0.711148,Temer
2,PODE,2019,0.731033,Bolsonaro
3,PODE,2020,0.713518,Bolsonaro
4,PODE,2021,0.703096,Bolsonaro
5,PODE,2022,0.690396,Bolsonaro
6,PODE,2023,0.742703,Lula
7,PP,2017,0.883135,Temer
8,PPS,2017,0.648649,Temer
9,PRD,2024,0.739759,Lula


In [62]:
bars = alt.Chart(grouped_df).mark_bar().encode(
    x=alt.X('year:O', title='Year', axis=alt.Axis(labelAngle=0)),
    xOffset='party:N',  # Group bars within each year
    y=alt.Y('% mean conceded partially:Q', title='Mean % of Conceded and Partially conceded responses'),
    color=alt.Color('party:N', title='Party'),
    tooltip=['year', 'party', '% mean conceded partially', 'president']
).properties(
    width=1200,
    height=400
)

# Annotations for Presidents
president_labels = alt.Chart(grouped_df).mark_text(
    align='center',
    baseline='bottom',
    dy=-200,
    fontSize=14,
    fontWeight='bold'
).encode(
    x=alt.X('year:O', title=None, axis=alt.Axis(labels=False, ticks=False)),
    text='president:N'
)

# Combine bar chart and annotations
chart = (bars + president_labels).resolve_scale(x='independent').properties(
    title="Mean performance of parties aligned to the president each year"
)
chart

In [63]:
chart.save('visualization_performance_aligned_parties_years.html')

In [None]:
# Overall
grouped_df = cabinet_df.groupby(["party", "president"])["conc_parc"].mean().reset_index()

grouped_df = df_grouped.rename(columns={"conc_parc": "% mean conceded partially"})
grouped_df.head(5)

Unnamed: 0,party,president,% mean conceded partially
0,DEM,Michel Temer,0.853594
1,MDB,Luiz Inácio Lula da Silva,0.859393
2,MDB,Michel Temer,0.739098
3,PCdoB,Luiz Inácio Lula da Silva,0.888889
4,PDB,Luiz Inácio Lula da Silva,0.658537


In [82]:
grouped_df.columns = grouped_df.columns.str.capitalize()

write_to_excel(grouped_df, RAW_DIR / "Brazil-Aligned and Non-Aligned All Presidents(editada).xlsx", "Perf Aligned Parties Overall")



In [69]:
# Performance by president
grouped_df = df_only_parties.groupby(["party", "president"])["conc_parc"].mean().reset_index()

grouped_df = grouped_df.rename(columns={"conc_parc": "% mean conceded partially"})

bars = alt.Chart(grouped_df).mark_bar().encode(
    x=alt.X('president:O', title='President', axis=alt.Axis(labelAngle=0)),
    xOffset='party:N',  # Group bars within each year
    y=alt.Y('% mean conceded partially:Q', title='Mean % of Conceded and Partially conceded responses'),
    color=alt.Color('party:N', title='Party'),
    tooltip=['party', '% mean conceded partially', 'president']
).properties(
    width=1200,
    height=400
)

chart = bars.properties(
    title="Mean performance of parties aligned to the president overall"
)
chart

In [70]:
chart.save('visualization_performance_aligned_parties_overall.html')