<h1>Annual Change in Debt Redemption</h1>
Create a bar graph showing the change in the amount of debt redeemed from year to year and month to month 

In [1]:
using CSV, DataFrames, DataFramesMeta, Gadfly, XLSX

In [58]:
function clean_df(df) 
    df[:, :dollars] = replace(df[:, :dollars], "NaN" => missing) # replace "NaN" with missing
    df[:, :cents] = replace(df[:, :cents], "NaN" => missing) # replace "NaN" with missing
    df[:, :dollars] = replace(df[:, :dollars], missing => 0)  # replace missing with 0 
    df[:, :cents] = replace(df[:, :cents], missing => 0)  # replace missing with 0 
    df 
end 

function group_by_year(df) 
    gdf = @by(df, :year,
        :total_amt = sum(:dollars) + sum(:cents)  # calculate total amount of debt 
    )
    sort!(gdf) # sort by year 
    replace!(gdf.year, "missing" => "no year") # change missing values to string to allow for plotting
    return gdf 
end

function group_by_year_month(df)
    gdf = @by(df, [:year, :month],
        :total_amt = sum(:dollars) + sum(:cents)  # calculate total amount of debt 
    )

    println(first(gdf, 5)) 
    
    sort!(gdf) # sort by year 
    replace!(gdf.year, "missing" => "no year") # change missing values to string to allow for plotting
    replace!(gdf.month, "missing" => "no month") # change missing values to string to allow for plotting
    return gdf 
end

#plot debt redeemed per year saved as svg 
function plot_debt(df::DataFrame, state, type)
    set_default_plot_size(40cm, 22.5cm)

    if type == "year"
        p_debt_date = Gadfly.plot(
            df,
            x=:year,
            y=:total_amt,
            Geom.bar,
            Guide.xlabel("Year"),
            Guide.ylabel("Total Debt (in dollars)"),
            Guide.title(uppercase(state)*" Debt Redeemed Per Year"),
            Gadfly.Theme(background_color = "white")
        )
        img = SVG("results/debt_per_year/year/" * uppercase(state) * "_debt_redeemed_per_year.svg", 40cm, 22.5cm) 
        draw(img, p_debt_date)  
    elseif type == "year_month"
        p_debt_date = Gadfly.plot(
            df,
            x=:year_month,
            y=:total_amt,
            Geom.bar,
            Guide.xlabel("Year"),
            Guide.ylabel("Total Debt (in dollars)"),
            Guide.title(uppercase(state)*" Debt Redeemed Per Year/Month"),
            Gadfly.Theme(background_color = "white")
        )

        img = SVG("results/debt_per_year/year_month/" * uppercase(state) * "_debt_redeemed_per_year_month.svg", 40cm, 22.5cm) 
        draw(img, p_debt_date)  
    end

    println(state)

end 

plot_debt (generic function with 2 methods)

In [56]:
# import cd_info 
cd_info = DataFrame(CSV.File("data/cd_info.csv"))
# store total amount and years of all states in a new dataframe 
all_states = DataFrame([[], [], [], []], [:year, :month, :year_month, :total_amt])

# loop through cd_info per state 
for i in 1:nrow(cd_info)
    state_df = DataFrame() # create new dataframe for each state
    state_row = cd_info[i, :]
    state_excel = XLSX.readxlsx(state_row[:file_path])
    state_sheet = state_excel["Sheet1"]
    
    # get year columns --> merge years columns into one column 
    year_col_indexes = split(state_row[:year_col], ",")
    for year_col_index in year_col_indexes
        year_col_range = year_col_index*string(state_row[:first_row])*":"*year_col_index*string(state_row[:last_row])
        year_col = state_sheet[year_col_range]
        
        if nrow(state_df) == 0 # if state_df is empty, create year column 
            state_df.year = vec(year_col)
        else 
            state_df.year .= coalesce(state_df.year, vec(year_col)) # append year col to state dataframe
        end 
    end

    state_df.year = string.(state_df.year) # convert year column to string to allow for replacing missing values

    # get month columns --> merge month columns into one column
    month_col_indexes = split(state_row[:month_col], ",")
    for month_col_index in month_col_indexes
        month_col_range = month_col_index*string(state_row[:first_row])*":"*month_col_index*string(state_row[:last_row])
        month_col = state_sheet[month_col_range]

        if !("month" in names(state_df)) 
            state_df.month = vec(month_col)
        else
            state_df.month .= coalesce(state_df.month, vec(month_col)) # append month col to state dataframe
        end 
    end
    
    state_df.month = string.(state_df.month) # convert month column to string to allow for replacing missing values

    # add dollar amount column to states dataframe 
    dollar_indexes = split(state_row[:dollars_col], ",") 
    for dollar_index in dollar_indexes
        dollar_col_range = dollar_index*string(state_row[:first_row])*":"*dollar_index*string(state_row[:last_row])
        dollar_col = state_sheet[dollar_col_range]
        state_df.dollars = vec(dollar_col)
    end

    # add cents amount column to states dataframe 
    if !ismissing(state_row[:cents_col]) # handle excel spreadsheets with no cents column 
        cents_indexes = split(state_row[:cents_col], ",")
        for cents_index in cents_indexes
            cents_col_range = cents_index*string(state_row[:first_row])*":"*cents_index*string(state_row[:last_row])
            cents_col = state_sheet[cents_col_range]
            state_df.cents = vec(cents_col) ./ 100 # convert cents to decimal
        end 
    else 
        state_df.cents = zeros(nrow(state_df)) # create column of zeros to allow for summing
    end

    # add state label 
    state_df.state = fill(state_row[:state], nrow(state_df))

    state_df_clean = clean_df(state_df) # clean table to remove missing values
    state_gdf = group_by_year_month(state_df_clean) # group by year and month and sum debt 

    # create new column that merges year and month 
    state_gdf.year_month = string.(state_gdf[:, :year], "-", state_gdf[:, :month])

    #plot_debt(state_gdf, state_row[:state], "year_month") # plot debt redeemed per year saved as svg
    plot_debt(state_gdf, state_row[:state], "year") # plot debt redeemed per year saved as svg


    all_states = vcat(all_states, state_gdf) # append state dataframe to all_states dataframe

    println(first(state_gdf, 5)) 
end 


[1m5×3 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m
─────┼───────────────────────────
   1 │ 1790    10        42912.3
   2 │ 1790    11        43252.7
   3 │ 1790    12        32611.6
   4 │ 1791    1         34013.3
   5 │ 1791    2         53571.1
ct
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m[90m String     [0m
─────┼───────────────────────────────────────
   1 │ 1790    10       42912.3   1790-10
   2 │ 1790    11       43252.7   1790-11
   3 │ 1790    12       32611.6   1790-12
   4 │ 1791    1        34013.3   1791-1
   5 │ 1791    10         889.94  1791-10


[1m5×3 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m
─────┼───────────────────────────
   1 │ 1790    12        2277.27
   2 │ 1791    3         6748.79
   3 │ 1791    4         1219.59
   4 │ 1791    5         3419.54
   5 │ 1791    6        20752.2
ga
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m[90m String     [0m
─────┼───────────────────────────────────────
   1 │ 1790    12        2277.27  1790-12
   2 │ 1791    11        1677.04  1791-11
   3 │ 1791    12          68.43  1791-12
   4 │ 1791    3         6748.79  1791-3
   5 │ 1791    4         1219.59  1791-4
[1m5×3 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m
─────┼───────────────────────────
   1 │ 1790    10     

md
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m[90m String     [0m
─────┼───────────────────────────────────────
   1 │ 1790    10        70983.1  1790-10
   2 │ 1790    11        35381.5  1790-11
   3 │ 1790    12        33696.2  1790-12
   4 │ 1791    1         27870.5  1791-1
   5 │ 1791    10        30314.2  1791-10
[1m5×3 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m
─────┼───────────────────────────
   1 │ 1791    2         4749.0
   2 │ 1791    4          455.0
   3 │ 1791    6         4891.05
   4 │ 1791    7         7870.49
   5 │ 1791    8         3025.34
nc
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m[90m String     [0m
─────┼───────

[1m5×3 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt     [0m
     │[90m String [0m[90m String [0m[90m Float64       [0m
─────┼───────────────────────────────
   1 │ 1791    3           2.41234e5
   2 │ 1791    4       34835.3
   3 │ 1791    5       48316.8
   4 │ 1791    6        5128.72
   5 │ 1791    7       22771.5
nh
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt     [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64       [0m[90m String     [0m
─────┼───────────────────────────────────────────
   1 │ 1791    10        354.0        1791-10
   2 │ 1791    3           2.41234e5  1791-3
   3 │ 1791    4       34835.3        1791-4
   4 │ 1791    5       48316.8        1791-5
   5 │ 1791    6        5128.72       1791-6
[1m5×3 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt     [0m
     │[90m String [0m[90m String [0m[90m Float64       [0m
─────┼───


nj
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m[90m String     [0m
─────┼───────────────────────────────────────
   1 │ 1791    1          958.12  1791-1
   2 │ 1791    10         465.54  1791-10
   3 │ 1791    11        1597.16  1791-11
   4 │ 1791    12        1695.27  1791-12
   5 │ 1791    6           92.42  1791-6
[1m5×3 DataFrame[0m
[1m Row [0m│[1m year    [0m[1m month   [0m[1m total_amt      [0m
     │[90m String  [0m[90m String  [0m[90m Float64        [0m
─────┼──────────────────────────────────
   1 │ 1790     10       273536.0
   2 │ missing  missing    3351.63
   3 │ 1790     11            1.01166e5
   4 │ 1790     12            1.68027e5
   5 │ 1791     1             1.17976e5


ny


[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt      [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64        [0m[90m String     [0m
─────┼────────────────────────────────────────────
   1 │ 1790    10      273536.0        1790-10
   2 │ 1790    11           1.01166e5  1790-11
   3 │ 1790    12           1.68027e5  1790-12
   4 │ 1791    1            1.17976e5  1791-1
   5 │ 1791    10       70064.7        1791-10
[1m5×3 DataFrame[0m
[1m Row [0m│[1m year    [0m[1m month   [0m[1m total_amt [0m
     │[90m String  [0m[90m String  [0m[90m Float64   [0m
─────┼─────────────────────────────
   1 │ 1790     10         93011.9
   2 │ missing  missing    76527.9
   3 │ 1790     11         58579.4
   4 │ 1790     12         51864.4
   5 │ 1791     1          85074.4
pa
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m


[1m5×3 DataFrame[0m
[1m Row [0m│[1m year    [0m[1m month   [0m[1m total_amt [0m
     │[90m String  [0m[90m String  [0m[90m Float64   [0m
─────┼─────────────────────────────
   1 │ 1790     11        10212.1
   2 │ 1790     12        26651.2
   3 │ missing  missing   20572.4
   4 │ 1791     1         14123.0
   5 │ 1791     2          9183.74
ri
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m[90m String     [0m
─────┼───────────────────────────────────────
   1 │ 1790    11       10212.1   1790-11
   2 │ 1790    12       26651.2   1790-12
   3 │ 1791    1        14123.0   1791-1
   4 │ 1791    10        4878.15  1791-10
   5 │ 1791    12         320.78  1791-12


[1m5×3 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m
─────┼───────────────────────────
   1 │ 1790    11       19376.1
   2 │ 1792    9         1540.68
   3 │ 1792    4          359.55
   4 │ 1790    12        8137.65
   5 │ 1790    9            0.0
sc
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m[90m String     [0m
─────┼───────────────────────────────────────
   1 │ 1790    11       19376.1   1790-11
   2 │ 1790    12        8137.65  1790-12
   3 │ 1790    9            0.0   1790-9
   4 │ 1791    1         3449.71  1791-1
   5 │ 1791    10        5748.95  1791-10
[1m5×3 DataFrame[0m
[1m Row [0m│[1m year    [0m[1m month   [0m[1m total_amt [0m
     │[90m String  [0m[90m String  [0m[90m Float64   [0m
─────┼─────────────────────────────
   1 │ 1790     10

va
[1m5×4 DataFrame[0m
[1m Row [0m│[1m year   [0m[1m month  [0m[1m total_amt [0m[1m year_month [0m
     │[90m String [0m[90m String [0m[90m Float64   [0m[90m String     [0m
─────┼───────────────────────────────────────
   1 │ 1790    1        19429.3   1790-1
   2 │ 1790    10        9627.79  1790-10
   3 │ 1790    11       14460.1   1790-11
   4 │ 1790    12        5353.4   1790-12
   5 │ 1791    1        18072.7   1791-1


In [60]:
# plot united states 
sort!(all_states) # sort by year
plot_debt(all_states, "united states", "year_month") # plot debt redeemed per year saved as svg
plot_debt(all_states, "united states", "year") # plot debt redeemed per year saved as svg

united states


united states


<h1>Pre-1790</h1>

In [124]:
# import pre-1790 debt data 
# get unique dates 
# group by year - sum debt 
# plot debt redeemed per year saved as svg 

pre1790 = DataFrame(CSV.File("../data/agg_debt_grouped.csv"))
pre1790.year = pre1790[:, "date of the certificate | year"]

# fix cents column in agg_debt_grouped.csv 
pre1790[:, "amount | 90th"] = getindex.(split.(pre1790[:, "amount | 90th"], "."), 1)
pre1790[:, "amount | 90th"] = replace.(pre1790[:, "amount | 90th"], "/" => "")
pre1790.cents = parse.(Float64, pre1790[:, "amount | 90th"]) ./ 100

pre1790.total_amt = pre1790[:, "amount | dollars"] + pre1790[:, "cents"]
pre1790.dollars = pre1790[:, "amount | dollars"]

pre1790.cents = ifelse.(pre1790.cents .>= 100, 0, pre1790.cents)

#clean 
pre1790_clean = clean_df(pre1790)

#group by year and sum 
pre1790_clean.year = coalesce.(pre1790_clean.year, 0)
pre1790_clean.year = Int.(pre1790_clean.year)
pre1790_clean.year = string.(pre1790_clean.year)
pre1790_gdf = group_by_year(pre1790_clean)
pre1790_gdf[1, "year"] = "no year"
sort!(pre1790_gdf)

Row,year,total_amt
Unnamed: 0_level_1,String,Float64
1,1776,51300.0
2,1777,4945900.0
3,1778,8102200.0
4,1779,28193900.0
5,1780,14315000.0
6,1781,2177100.0
7,1783,104227.0
8,1784,1066260.0
9,1785,1534780.0
10,1786,1022510.0


In [125]:
#plot 
set_default_plot_size(16cm, 9cm)

p_debt_date = Gadfly.plot(
    pre1790_gdf,
    x=:year,
    y=:total_amt,
    Geom.bar,
    Guide.xlabel("Year"),
    Guide.ylabel("Amount (in dollars)"),
    Guide.title("Debt Certificate Total Per Year"),
    Gadfly.Theme(background_color = "white")
)

img = SVG("results/debt_per_year/pre1790_debt_certificate_amts_per_year.svg", 16cm, 9cm) 
draw(img, p_debt_date)  

false