# 1) Imports

In [131]:
import pandas as pd
import numpy as np
import unidecode
import re

# 2) Coleta dos dados

## 2.1) Municípios potenciais

In [174]:
top_muns = pd.read_csv("../Dados/Tabela_final/municipios_potenciais.csv").drop(columns=['Unnamed: 0'])

## 2.2) Dados de produção agrícola

In [164]:
def limpar_nome(nome):
    nome_sem_acentos = unidecode.unidecode(nome)  # Remove acentos
    nome_sem_codigo_estado = re.sub(r'\(\w+\)', '', nome_sem_acentos)  # Remove a sequência "(XX)"
    return nome_sem_codigo_estado.upper()  # Converte para maiúsculas

def limpar_dataframe(df, valor):
    
    municipios = df['Brasil e Município'].apply(limpar_nome)
    new_df = pd.DataFrame()
    for i in range(0,3):
        new_year_df = df.iloc[:, 1 + 37 * i : 37 * (i + 1)]
        new_year_df.columns = [column.rstrip('.1').rstrip('.2') for column in new_year_df.columns]
        new_year_df.insert(0, 'MUNICIPIO', list(municipios))
        new_df = pd.concat([new_df, new_year_df], ignore_index = True)
    new_df = new_df.replace(['-', '...'], 0)
    new_df = new_df.fillna(0)
    new_df = new_df.melt(id_vars=['MUNICIPIO'], var_name='PRODUTO', value_name=valor)
    new_df[valor] = new_df[valor].astype('int64')
    new_df = new_df[['MUNICIPIO', 'PRODUTO', valor]].groupby(['MUNICIPIO', 'PRODUTO']).mean().reset_index()
    new_df['PRODUTO'] = new_df['PRODUTO'].apply(limpar_nome)

    return new_df

area_colhida = pd.read_csv("../Dados/PAM/pam_area_colhida.csv",sep=';')
area_plantada = pd.read_csv("../Dados/PAM/pam_area_plantada.csv",sep=';')
rend_medio = pd.read_csv("../Dados/PAM/pam_rendimento_medio.csv",sep=';')
valor_producao = pd.read_csv("../Dados/PAM/pam_valor_producao.csv",sep=';')

area_colhida = limpar_dataframe(area_colhida, 'AREA_COLHIDA')
area_plantada = limpar_dataframe(area_plantada, 'AREA_PLANTADA')
rend_medio = limpar_dataframe(rend_medio, 'REND_MEDIO')
valor_producao = limpar_dataframe(valor_producao, 'VALOR_PROD')

agro = area_plantada.merge(area_colhida, on = ['MUNICIPIO', 'PRODUTO'], how = 'inner')
agro = agro.merge(rend_medio, on = ['MUNICIPIO', 'PRODUTO'], how = 'inner')
agro = agro.merge(valor_producao, on = ['MUNICIPIO', 'PRODUTO'], how = 'inner')

agro

Unnamed: 0,MUNICIPIO,PRODUTO,AREA_PLANTADA,AREA_COLHIDA,REND_MEDIO,VALOR_PROD
0,ABAIARA,ALFAFA FENADA,0.000000,0.000000,0.000000,0.000000
1,ABAIARA,ALGODAO ARBOREO (EM CAROCO),0.000000,0.000000,0.000000,0.000000
2,ABAIARA,ALGODAO HERBACEO (EM CAROCO),26.666667,26.666667,2796.333333,214.666667
3,ABAIARA,AMENDOIM (EM CASCA),15.000000,15.000000,978.000000,60.666667
4,ABAIARA,BANANA,26.666667,26.666667,18363.333333,786.333333
...,...,...,...,...,...,...
51907,ZABELE,TRIGO (EM GRAO),0.000000,0.000000,0.000000,0.000000
51908,ZABELE,TRITICALE (EM GRAO),0.000000,0.000000,0.000000,0.000000
51909,ZABELE,TUNGUE (FRUTO SECO),0.000000,0.000000,0.000000,0.000000
51910,ZABELE,URUCUM,0.000000,0.000000,0.000000,0.000000


In [175]:
top_muns

Unnamed: 0,NOME,UF,IDH,POP_TOT,POP_TOT_30KM,CLUSTER,AREA_IRRIGADA_POT_E,TRANSPORT_COST,REN_PIBPC_D,PANALF15
0,ITAPICURU,BA,0.486,35881.0,197766.2,1,23.347661,23766.055025,6.970836,19.390462
1,MATA GRANDE,AL,0.504,25555.2,129923.6,1,0.000000,24784.146031,4.465476,19.847595
2,MONTE SANTO,BA,0.506,53678.4,107356.8,1,156.631138,24661.745267,4.339055,15.978543
3,ITAÍBA,PE,0.510,26440.2,129160.2,1,0.000000,24766.124221,5.013908,19.760057
4,TUPANATINGA,PE,0.519,26445.0,135832.6,1,0.000000,24850.504059,4.498442,20.315929
...,...,...,...,...,...,...,...,...,...,...
62,RIBEIRA DO POMBAL,BA,0.601,52597.4,137844.2,1,5.193767,24140.726560,6.678153,13.721121
63,SANHARÓ,PE,0.603,25030.4,274896.2,1,0.000000,24794.121477,5.017081,19.950547
64,BOQUIRA,BA,0.603,22427.8,110383.8,1,0.000000,24795.945074,4.300096,12.868454
65,SIMÃO DIAS,SE,0.604,40522.2,120552.4,1,0.000000,23918.838452,8.691907,15.650402
