In [None]:
import pandas as pd
from utils import print_equals
import scipy.stats as stats

from pathlib import Path

import openpyxl
from openpyxl import load_workbook

### Comparando média de conformidade das agências contra governos populistas e a favor de governos populistas antes, depois e durante o governo bolsonaro

In [4]:
from transform import INTERIM_DIR

final_df = pd.read_parquet(INTERIM_DIR / "data-parquet-final-1.parquet")

In [5]:
import plotly.express as px
import plotly.graph_objects as go

# Filtering "contra" and "alinhada" agencies
df_plot_1 = final_df.copy()
df_plot_1 = df_plot_1[df_plot_1['category'].isin(['contra', 'alinhada'])]

# Calculating mean of conformity by each year and category
df_plot_1 = df_plot_1.groupby(['year', 'category'])['conc_parc'].mean().reset_index()

# Getting the president of each year
df_presidents = final_df[['year', 'president']].drop_duplicates()
df_plot_1 = df_plot_1.merge(df_presidents, on='year', how='left')

# Criar gráfico
fig = px.line(
    df_plot_1, x='year', y='conc_parc',
    color='category',  # Apenas duas linhas para as categorys
    line_dash='category',
    markers=True, 
    hover_data={'year': True, 'conc_parc': True, 'president': True},
    labels={'conc_parc': 'Média de Conformidade', 'year': 'Ano', 'category': 'category'}
)

# Adicionar pontos específicos para os presidentes
for presidente in df_presidents['president'].unique():
    df_pres = df_plot_1[df_plot_1['president'] == presidente]
    fig.add_trace(go.Scatter(
        x=df_pres['year'], y=df_pres['conc_parc'],
        mode='markers',
        marker=dict(size=10, symbol='circle'),
        name=presidente
    ))


fig.update_layout(
    title='Média de Conformidade das Agências "Contra" e "Alinhadas" ao Longo dos Anos',
    xaxis_title='Ano',
    yaxis_title='Média de Conformidade',
    legend_title='Legenda',
    hovermode='x unified'
)

fig.show()


NameError: name 'final_df' is not defined

In [10]:
from plots import plot_grid_all_agencies

fig = plot_grid_all_agencies(final_df, width=1200)
fig.show()

O que mais queria é o seguinte: pega a média das alinhada e das contra só para todos os presidentes que não sejam Bolsonaro. Depois, pega a média para Bolsonaro. Depois compara - test of means (two-tailed T test) - para ver se tem uma diferença estatisticamente significante entre ex. CGU durante presidentes regulares  V. CGU durante Bolsonaro. Faça isso para cada agência, se puder, e depois para todos as agências (alinhada/contra) de forma agregada.

In [8]:
final_df.head(5)

Unnamed: 0,president,year,agency,conc_parc,category
0,Dilma Rousseff,2015,ABGF - Agencia Brasileira Gestora de Fundos Ga...,0.339623,neutra
1,Dilma Rousseff,2015,AEB – Agência Espacial Brasileira,0.962264,neutra
2,Dilma Rousseff,2015,AMAZUL - Amazônia Azul Tecnologias de Defesa S.A.,0.909091,neutra
3,Dilma Rousseff,2015,AN – Arquivo Nacional,0.955128,neutra
4,Dilma Rousseff,2015,ANA – Agência Nacional de Águas,0.996875,neutra


### Testing the filtering in president and in category

In [7]:
aligned_df = final_df[final_df["category"] == "alinhada"]
not_aligned_df = final_df[final_df["category"] == "contra"]

not_bolsonaro_aligned_df = aligned_df[aligned_df["president"] != "Jair Bolsonaro"]
bolsonaro_aligned_df = aligned_df[aligned_df["president"] == "Jair Bolsonaro"]
not_bolsonaro_not_aligned_df = not_aligned_df[not_aligned_df["president"] != "Jair Bolsonaro"]
bolsonaro_not_aligned_df = not_aligned_df[not_aligned_df["president"] == "Jair Bolsonaro"]

# Testing
print_equals(40)
print("Primeiras 5 linhas df só com empresas alinhadas:")
display(aligned_df.head(5))
print("Tamanho df só com empresas alinhadas:")
print(len(aligned_df.index))
print("Nomes dos presidentes:")
print(aligned_df["president"].unique())
print_equals(30)

print("Testando df sem bolsonaro:")
print("Primeiras 5 linhas:")
display(not_bolsonaro_aligned_df.head(5))
print_equals(20)
print("Quantidade de linhas:")
print(len(not_bolsonaro_aligned_df.index))
print("Nomes dos presidentes:")
print(not_bolsonaro_aligned_df["president"].unique())
print_equals(30)

print("Testando df com bolsonaro:")
print("Primeiras 5 linhas:")
display(bolsonaro_aligned_df.head(5))
print_equals(20)
print("Quantidade de linhas:")
print(len(bolsonaro_aligned_df.index))
print_equals(20)
print("Nomes dos presidentes:")
print(bolsonaro_aligned_df["president"].unique())
print_equals(30)

Primeiras 5 linhas df só com empresas alinhadas:


Unnamed: 0,president,year,agency,conc_parc,category
255,Dilma Rousseff,2015,CGU – Controladoria-Geral da União,0.684647,alinhada
256,Dilma Rousseff,2015,MAPA - Ministério da Agricultura e Pecuária,0.860215,alinhada
259,Dilma Rousseff,2015,MD – Ministério da Defesa,0.837748,alinhada
263,Dilma Rousseff,2015,MF - Ministério da Fazenda,0.716955,alinhada
265,Dilma Rousseff,2015,MJSP – Ministério da Justiça e Segurança Pública,0.837927,alinhada


Tamanho df só com empresas alinhadas:
58
Nomes dos presidentes:
['Dilma Rousseff' 'Michel Temer' 'Jair Bolsonaro'
 'Luiz Inácio Lula da Silva']
Testando df sem bolsonaro:
Primeiras 5 linhas:


Unnamed: 0,president,year,agency,conc_parc,category
255,Dilma Rousseff,2015,CGU – Controladoria-Geral da União,0.684647,alinhada
256,Dilma Rousseff,2015,MAPA - Ministério da Agricultura e Pecuária,0.860215,alinhada
259,Dilma Rousseff,2015,MD – Ministério da Defesa,0.837748,alinhada
263,Dilma Rousseff,2015,MF - Ministério da Fazenda,0.716955,alinhada
265,Dilma Rousseff,2015,MJSP – Ministério da Justiça e Segurança Pública,0.837927,alinhada


Quantidade de linhas:
37
Nomes dos presidentes:
['Dilma Rousseff' 'Michel Temer' 'Luiz Inácio Lula da Silva']
Testando df com bolsonaro:
Primeiras 5 linhas:


Unnamed: 0,president,year,agency,conc_parc,category
1559,Jair Bolsonaro,2019,MF - Ministério da Fazenda,0.489094,alinhada
1649,Jair Bolsonaro,2019,CGU – Controladoria-Geral da União,0.719971,alinhada
1650,Jair Bolsonaro,2019,MAPA - Ministério da Agricultura e Pecuária,0.857547,alinhada
1652,Jair Bolsonaro,2019,MD – Ministério da Defesa,0.657194,alinhada
1656,Jair Bolsonaro,2019,MJSP – Ministério da Justiça e Segurança Pública,0.68942,alinhada


Quantidade de linhas:
21
Nomes dos presidentes:
['Jair Bolsonaro']


In [None]:
# Defining functions

def two_tailed_t_test(dist_1: pd.Series, dist_2: pd.Series, alpha: float = 0.5) -> bool:
    """
    Function to evaluate the two-tailed t-test and print the results
    """
    _, p_value = stats.ttest_ind(dist_1, dist_2)

    if p_value < alpha:
        return True
    else:
        return False

def two_tailed_t_test_aligned_not_aligned_bolsonaro(
        not_bolsonaro_aligned_conceded_distribution: pd.Series,
        bolsonaro_aligned_conceded_distribution: pd.Series,
        not_bolsonaro_not_aligned_conceded_distribution: pd.Series,
        bolsonaro_not_aligned_conceded_distribution: pd.Series,
        alpha: float | list[float] = 0.5
    ):
    
    # two-tailed t-test to evaluate difference between aligned agencies
    print(f"Two-tailed t-tests with alpha = {alpha}")

    print("For the aligned agencies:")
    if two_tailed_t_test(not_bolsonaro_aligned_conceded_distribution, bolsonaro_aligned_conceded_distribution):
        print(f"There's a significant difference between the periods with and without Bolsonaro in the presidence")
    else:
        print(f"There's NOT a significant difference between the periods with and without Bolsonaro in the presidence")
    
    print("For the not-aligned agencies:")
    if two_tailed_t_test(not_bolsonaro_not_aligned_conceded_distribution, bolsonaro_not_aligned_conceded_distribution):
        print(f"There's a significant difference between the periods with and without Bolsonaro in the presidence")
    else:
        print(f"There's NOT a significant difference between the periods with and without Bolsonaro in the presidence")

def two_tailed_t_test_aligned_not_aligned_bolsonaro_every_agency(final_df: pd.DataFrame, alpha: float = 0.5):
    print(f"Two-tailed t-tests with alpha = {alpha}")
    
    not_neutral_agencies = final_df[final_df["category"] != "neutra"]

    rejected_null_h = {}
    for agency in not_neutral_agencies["agency"].unique():
        # filtering by the agency
        current_agency_df = final_df[final_df["agency"] == agency]
        
        # Filtering by Bolsonaro
        bolsonaro_df = current_agency_df[current_agency_df["president"] == "Jair Bolsonaro"]
        not_bolsonaro_df = current_agency_df[current_agency_df["president"] != "Jair Bolsonaro"]

        # Getting the desired column
        dist_bolsonaro = bolsonaro_df["conc_parc"]
        dist_not_bolsonaro = not_bolsonaro_df["conc_parc"]

        rejected_null_h[agency] = two_tailed_t_test(dist_bolsonaro, dist_not_bolsonaro, alpha)

    # Mapping the results of the tests to the dataframe
    not_neutral_agencies["changed_during_bolsonaro"] = not_neutral_agencies["agency"].map(rejected_null_h)
    return not_neutral_agencies

In [None]:
# Executing the computation the agencies aggregated
not_bolsonaro_aligned_conceded_distribution = not_bolsonaro_aligned_df["conc_parc"]
bolsonaro_aligned_conceded_distribution = bolsonaro_aligned_df["conc_parc"]
not_bolsonaro_not_aligned_conceded_distribution = not_bolsonaro_not_aligned_df["conc_parc"]
bolsonaro_not_aligned_conceded_distribution = bolsonaro_not_aligned_df["conc_parc"]

two_tailed_t_test_aligned_not_aligned_bolsonaro(
    not_bolsonaro_aligned_conceded_distribution,
    bolsonaro_aligned_conceded_distribution,
    not_bolsonaro_not_aligned_conceded_distribution,
    bolsonaro_not_aligned_conceded_distribution
)

two-tailed t-tests with alpha = 0.5
For the aligned agencies:
There's a significant difference between the periods with and without Bolsonaro in the presidence
For the not-aligned agencies:
There's NOT a significant difference between the periods with and without Bolsonaro in the presidence


In [None]:
# All agencies

num_not_neutral_agencies = len(final_df[final_df["category"] != "neutra"]["agency"].unique())
print(f"Nº of not neutral agencies: {num_not_neutral_agencies}")

# Executing the computation for each agency
all_agencies_df = two_tailed_t_test_aligned_not_aligned_bolsonaro_every_agency(final_df)

agencies_that_not_changed = list(all_agencies_df[~all_agencies_df["changed_during_bolsonaro"]]["agency"].unique())
agencies_that_changed = list(all_agencies_df[all_agencies_df["changed_during_bolsonaro"]]["agency"].unique())
num_agencies_that_not_changed = len(agencies_that_not_changed)
num_agencies_that_changed = len(agencies_that_changed)

print_equals(30)
print(f"Nº of agencies that changed during Bolsonaro presidence {num_agencies_that_changed}:")
for idx, agency in enumerate(agencies_that_changed):
    print(f"{idx + 1}. {agency}")
print_equals(20)
print(f"Nº of agencies that didn't changed during Bolsonaro presidence {num_agencies_that_not_changed}:")
for idx, agency in enumerate(agencies_that_not_changed):
    print(f"{idx + 1}. {agency}")

Nº of not neutral agencies: 13
Nº of agencies that changed during Bolsonaro presidence 11:
1. ANVISA – Agência Nacional de Vigilância Sanitária
2. FUNAI – Fundação Nacional dos Povos Indígenas
3. IBAMA – Instituto Brasileiro do Meio Ambiente e dos Recursos Naturais Renováveis
4. CGU – Controladoria-Geral da União
5. MAPA - Ministério da Agricultura e Pecuária
6. MD – Ministério da Defesa
7. MEC – Ministério da Educação
8. MF - Ministério da Fazenda
9. MMA – Ministério do Meio Ambiente e Mudança do Clima
10. MS – Ministério da Saúde
11. MDA - Ministério do Desenvolvimento Agrário e Agricultura Familiar
Nº of agencies that didn't changed during Bolsonaro presidence 2:
1. EBC – Empresa Brasil de Comunicação S.A.
2. MJSP – Ministério da Justiça e Segurança Pública


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  not_neutral_agencies["changed_during_bolsonaro"] = not_neutral_agencies["agency"].map(rejected_null_h)


How did each president’s cabinets perform relative to others? I would like total cabinet numbers per president and per year aggregated for all variables. Put these aggregate totals in one table, in a separate tab.