In [1]:
using Pkg
Pkg.activate(".")

[32m[1m  Activating[22m[39m project at `~/Documents/GitHub/MacroEconBrasil`


In [5]:
using Arrow
using DataFrames
using StatsBase
using Vizagrams
using StructArrays
using KernelDensity
using InverseFunctions
using PrettyNumbers

In [6]:
function get_dist_renda(ano)
    table = Arrow.Table("microdados_pnadc_$(ano).feather")
    df = DataFrame(table)
    df = dropmissing(df, :renda);
    df = df[df.idade .>= 18,:]
    df[!,:log_renda] = log1p.(df.renda);
    dist = kde(df.log_renda,bandwidth=0.15);
    renda = InverseFunctions.inverse(log1p).(dist.x)
    dd = StructArray(log_renda=dist.x, density=dist.density, renda=renda, ano = map(x->ano, 1:length(dist.x)));

    return dd
end

get_dist_renda (generic function with 1 method)

In [7]:
df = DataFrame(mapreduce(ano->get_dist_renda(ano),vcat,2012:2023));

In [19]:
df_2023 = df[df[!,:ano] .== 2023,:];

In [51]:
ticks = [0,10,100,1000,5000,20000,300000]
tickstexts = pretty_number.(String,ticks)
plt = plot(
    df,
    figsize=(800,200),
    config=(
        xaxis=(
            title="Renda R\$",
            # tickvalues=log1p.(ticks),
            # ticktextangle=π/4,
            ticktexts=tickstexts,
            ),
        ),
    x=(field=:log_renda,
        guide=(tickvalues=log1p.(ticks),)),
    y=:density,
    color=(field=:ano,datatype=:o,colorscheme=:rainbow),
    graphic=∑(i=:color) do rows
        S(:fillOpacity=>0.1,:fill=>rows.color[1],:stroke=>rows.color[1])*
        Line(rows.x,rows.y)
    end
    # graphic=∑(i=:color,op=(x,y)->x + T(0,10)y) do rows
    #     S(:stroke=>rows.color[1],:strokeWidth=>2)Line(rows.x,rows.y)
    # end
)

draw(plt)

In [131]:
dist = kde(df.log_renda,bandwidth=0.15);
dd = StructArray(x=dist.x, y=dist.density);
ilog1p = InverseFunctions.inverse(log1p)
# ticks = ilog1p.([0,2.5,5,7.5,10,12.5,15])
ticks = [0,10,100,1000,5000,20000,300000]
tickstexts = pretty_number.(String,ticks)
plt = plot(
    dd,
    figsize=(800,200),
    config=(
        xaxis=(
            title="Renda R\$",
            # tickvalues=log1p.(ticks),
            # ticktextangle=π/4,
            ticktexts=tickstexts,
            ),
        ),
    x=(field=:x,
        guide=(tickvalues=log1p.(ticks),)),
    y=:y,
    graphic=S(:strokeWidth=>2)Line()
)

draw(plt)
# round.(ticks;digits=0)

In [132]:
# h = fit(Histogram, df.renda, nbins=10);
h = fit(Histogram, df.renda, [0,500,1000,1500,2000,2500,3000,3500,4000,5000,6000,7000,8000,9000,10000, 12000, 14000,16000,20000,30000,50000,100000,200000,300000]);
# Get the edges of the bins
edges = h.edges[1]

# Compute the centers of the bins
bin_centers = (edges[1:end-1] .+ edges[2:end]) ./ 2

data = StructArray(x=bin_centers, h=h.weights);

In [133]:
hist = Plot(
    data = data,
    encodings=(
        x=(field=:x,datatype=:q),
        y=(field=:h,datatype=:q),
    ),
    graphic = Line()
)

draw(hist)