# Step 1: Repeat the Saez paper with Kleven-Best data

### Set up the iteration procedure

In [None]:
# Load libraries
using Parameters
using Plots
using Trapz
using NumericalIntegration
using SpecialFunctions
using Interpolations
using TimerOutputs
using JLD2
using Peaks
using DataFrames
using Statistics

In [None]:
# Add libraries
# using Pkg
# Pkg.add("Peaks")

In [None]:
# Import Kleven-Best data from JLD2
# Make the primitives (data) struct
@with_kw struct Prims_struct
    z            # Income
    n            # Ability
    fn           # PDF of ability
    ntop         # Number of bins       
end;


In [None]:
# Set some parameters through a params struct
# Move γ, e into params
@with_kw struct Params_struct
    γ ::Int64            # SWF inequality aversion parameter
    e ::Float64          # Elasticity ε = 1/k
    k ::Int64 = 1/e      # Parametrizes elasticity ε (FINISH: Necessary?)
    R ::Int64            # Exogenous per-person revenue requirement
end;


In [None]:
# Define the inner loop
function inner_loop(prims, params, τ, T0, λ, SWF, Tax)
    # Utility
    u = -T0 .+ cumul_integrate(prims.n, ((1 .- τ).^(1+params.e))/(1+params.e))
    #println(u)
    # Copy the old variables
    λ0 = copy(λ)
    SWF0 = copy(SWF)
    Tax0 = copy(Tax)
    # MVPF
    λ = trapz(prims.n, (u .^ -params.γ) .* prims.fn)
    # RHS of the optimal tax formula
    ABmat = (cumul_integrate(prims.n, (u.^-params.γ/λ .- 1).*prims.fn) ./
             (prims.n .* prims.fn)) * params.k
    # Update τ
    τ_new = ABmat ./ (1 .+ ABmat)
    # Update the government optimization variables
    z_new = prims.n .* (1 .- τ_new) .^ params.e
    c = u .+ prims.n.*((z_new./prims.n).^(params.k+1))/(params.k+1)
    T = z_new - c
    
    Tax = trapz(prims.n, T .* prims.fn)
    SWF = trapz(prims.n, digamma.(u) .* prims.fn)
    
    return τ_new, λ, λ0, SWF, SWF0, Tax, Tax0
end

In [None]:
## Define the outer loop
function outer_loop(prims, params)
    # Iterate over these variables of interest
    τ = 0.2*ones(prims.ntop) # Tax rate
    T0 = -10000 # Negative income tax
    
    # Iteration variables
    dTax = 10000
    outer_iter = 0
    
    # Iterate through the inner optimization loop
    while abs(dTax) > 1
        # Update outer loop variables
        outer_iter += 1
        T0 = T0 - dTax # Adjuest this if necessary
        
        # Create innter loop variables
        λ = 1 # Marginal value of public funds
        λ0 = 0
        SWF = 1 # SWF value
        SWF0 = 0
        Tax = 1 # Tax revenue
        Tax0 = 0
        
        # Iterate through the inner loop
        inner_iter = 0
        while abs(λ0 - λ) > 0.0001 ||
        abs(SWF0 - SWF) > 0.0001 ||
        abs(Tax0 - Tax) > 0.0001
            # Run the inner loop
            inner_iter += 1
            τ, λ, λ0, SWF, SWF0, Tax, Tax0 = 
                inner_loop(prims, params, τ, T0, λ, SWF, Tax)
            #println(Tax)
        end
        dTax = Tax - params.R
        #println(inner_iter)
        println("Outer iter:", outer_iter, " dTax:", dTax, " Tax:", Tax)
        flush(stdout)
    end
    return τ, T0, dTax
end

### Test with Saez data

In [None]:
# Test with Saez data
# Load the JLD file
saez_jld = load("data/saez-data.jld2");

# Create the struct objects
saez_prims = Prims_struct(z = saez_jld["z"], n=saez_jld["n"], fn=saez_jld["fn"], 
    ntop=length(saez_jld["n"]));
saez_params = Params_struct(γ = saez_jld["γ"], e = saez_jld["e"], R = saez_jld["R"]);

In [None]:
# Run with Saez data
saez_τ, saez_T0, saez_dTax = outer_loop(saez_prims, saez_params);

In [None]:
plot(saez_τ)

## Run with Best-Kleven data

### 1: $\gamma = 10$

In [None]:
# Run with Best-Kleven data
# Load the JLD file
bk_jld = load("data/zprimitives2.jld2");

# Create the struct objects
bk_prims = Prims_struct(z=bk_jld["z"], n=bk_jld["n"], fn=bk_jld["fn"], ntop=bk_jld["ntop"]);
bk_params10 = Params_struct(γ=bk_jld["γ"], e=bk_jld["e"], R=4000);

In [None]:
# Run with Best-Kleven data
bk_τ10, bk_T010, bk_dTax10 = outer_loop(bk_prims, bk_params10)

In [None]:
# Plot results with Best-Kleven data
#bk_n0 = findmaxima(bk_τ)[1][last] # Start plot here, first local max
bk_n010 = findmax(bk_τ10)[2] # Global max
bk_nend = 978 # Taken from Best-Kleven replication package, possibly adjust later

In [None]:
plot(bk_prims.z[bk_n0:bk_nend], bk_τ[bk_n0:bk_nend],
     xlims = (0, 1e6), ylims = (0, 1))

#### Figure 1

Graph the MTR and the ATR on the same plot.

In [None]:
# Find the ATR
bk_zdiff = diff(bk_prims.z);
pushfirst!(bk_zdiff, bk_prims.z[1]);
bk_T = cumsum(bk_τ .* bk_zdiff) .+ bk_T0
bk_ATR = bk_T ./ bk_prims.z;

In [None]:
# Plot (same axis)
plot(bk_prims.z[bk_n0:bk_nend], bk_τ[bk_n0:bk_nend],
     xlims = (0, 1e6), ylims = (-3, 1), right_margin=20Plots.mm,
     label = "MTR", legend =:topright, xlab = "Income", ylab = "Tax rate")
plot!(bk_prims.z[bk_n0:bk_nend], bk_ATR[bk_n0:bk_nend],
     label = "ATR", color = "red")

# savefig("figs/MTR_ATR_no_educ_same_axis")

In [None]:
# Plot (different axes)
plot(bk_prims.z[bk_n0:bk_nend], bk_τ[bk_n0:bk_nend],
     xlims = (0, 1e6), ylims = (0, 1), right_margin=20Plots.mm,
     label = "MTR", legend =:topleft, xlab = "Income", ylab = "MTR")
plot!(twinx(),bk_prims.z[bk_n0:bk_nend], bk_ATR[bk_n0:bk_nend],
     xlims = (0, 1e6), ylims = (-3, 1), label = "ATR", 
     legend =:topright, color = "red", ylab = "ATR")

# savefig("figs/MTR_ATR_no_educ_diff_axis")

#### Figure 2

Graph $f_0(\omega_0 - E[\omega_0]), f_z(z - E[z]),$ and $f_T((z - T(z)) - E[z - T(z)])$ on the same plot.

In [None]:
# Function that smoothes distributions
# (Same procedure as in Saez)
function smooth_dist(dist, niter)
    old = copy(dist)
    new = copy(dist)
    ndata = length(dist)
    for i in 1:niter
        for j in 2:ndata-1
            new[j] = 0.3*old[j-1] + 0.4*old[j] + 0.3*old[j+1]
        end
        old = copy(new)
    end
    return new
end;

In [None]:
# Load distributions from the original data
bk_dta = dta = Float64.(DataFrame(load("data/new_dta/psid/psidtaxsim_20mil.dta")));

In [None]:
# Smooth the income distribution
# So it's not much messier the ability distribution
# (Using the same number of iterations as smoothing the ability distribution)
bk_hz = smooth_dist(bk_dta.hz, 500);

In [None]:
# Find the net-of-tax income distribution
bk_Thz = bk_prims.z - bk_T;

In [None]:
# Adjust everything to be the region we care about
fn_plot = bk_prims.fn[bk_n0:bk_nend];
hz_plot = bk_hz[bk_n0:bk_nend];
n_plot = (bk_prims.n .- mean(bk_prims.n))[bk_n0:bk_nend];
z_plot = (bk_prims.z .- mean(bk_prims.z))[bk_n0:bk_nend];
Tz_plot = (bk_Thz .- mean(bk_Thz))[bk_n0:bk_nend];

In [None]:
plot(n_plot, fn_plot,
     xlims = (-2.5e6, 1e6),
     right_margin=20Plots.mm, xlab = "n, z", ylab = "f(x)",
     label = "f(n - E[n])")
plot!(z_plot, hz_plot, 
     label = "f(z - E[z])")
plot!(Tz_plot, hz_plot,
     label = "f((z - T(z))) - E[z - T(z)])")

# savefig("figs/fx_no_educ")

### 2: $\gamma = 1$

## Extra charts

From 12/1/22 meeting, to compare the MTR and income distribution between this data and the Best-Kleven data.

### 1: Top right panel of Best-Kleven Figure 3

### 2: Income distributions

### 3: Hazard ratio

