In [1]:
using CSV
using DataFrames
using Serialization

In [4]:
df = DataFrame(CSV.File("top20_adjacency_mat.csv"));
population_sizes = Dict(
    "PEK" => 16_938_000,
    "LHR" => 9_800_000,
    "CGK" => 9_733_000,
    "DEL" => 16_787_941,
    "BOM" => 12_442_373,
    "ATL" => 4_661_000,
    "HND" => 36_975_000,
    "BKK" => 8_484_000,
    "CDG" => 10_514_000,
    "CAN" => 10_546_000,
    "PVG" => 20_948_000,
    "SIN" => 5_176_000,
    "LAX" => 12_197_000,
    "KUL" => 6_005_000,
    "CTU" => 7_570_000,
    "AMS" => 1_073_000,
    "GRU" => 11_310_000,
    "DEN" => 2_428_000,
    "HKG" => 7_066_000,
    "MNL" => 12_075_689
);


In [None]:
df[df.Origin .== "AMS",:]

In [19]:
# this adjacency matrix will store the number of migrations per origin population in a given year
new_adjacency_matrix = Array{Float64}(undef,size(df,1), size(df,2)-1)
population_sizes_vector = Vector{Int64}(undef,length(population_sizes))

for from_idx = 1:size(new_adjacency_matrix,2)
    for to_idx = 1:size(new_adjacency_matrix,1)
        new_adjacency_matrix[to_idx,from_idx] = df[from_idx,to_idx+1] / population_sizes[df[from_idx,1]]
    end
    # record the population size for the corresponding col of the migration matrix
    population_sizes_vector[from_idx] = population_sizes[df[from_idx,1]] 
end

# Currently, this is the rate per year, we want the rate in units of the recovery time (typically 7 days, 1 week) so divide by 52
new_adjacency_matrix ./= 52

serialize("cleaned_adjacency_matrix", (new_adjacency_matrix, population_sizes_vector))

In [16]:
population_sizes

Dict{String, Int64} with 20 entries:
  "CGK" => 9733000
  "AMS" => 1073000
  "CTU" => 7570000
  "PVG" => 20948000
  "DEN" => 2428000
  "LHR" => 9800000
  "CAN" => 10546000
  "HND" => 36975000
  "ATL" => 4661000
  "SIN" => 5176000
  "HKG" => 7066000
  "MNL" => 12075689
  "BOM" => 12442373
  "BKK" => 8484000
  "CDG" => 10514000
  "KUL" => 6005000
  "LAX" => 12197000
  "DEL" => 16787941
  "GRU" => 11310000
  "PEK" => 16938000

In [17]:
population_sizes_vector

20-element Vector{Int64}:
  1073000
  4661000
  8484000
 12442373
 10546000
 10514000
  9733000
  7570000
 16787941
  2428000
 11310000
  7066000
 36975000
  6005000
 12197000
  9800000
 12075689
 16938000
 20948000
  5176000

In [18]:
df

Row,Origin,AMS,ATL,BKK,BOM,CAN,CDG,CGK,CTU,DEL,DEN,GRU,HKG,HND,KUL,LAX,LHR,MNL,PEK,PVG,SIN
Unnamed: 0_level_1,String3,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,AMS,0.0,149482.0,190450.0,89757.0,2552.79,179445.0,1161.55,42272.0,91847.4,1315.21,47736.9,202192.0,131.195,91241.8,101692.0,277285.0,73382.7,206660.0,170692.0,144559.0
2,ATL,140723.0,0.0,2856.1,1385.07,860.401,189862.0,251.249,200.288,1397.96,794270.0,55088.6,2245.84,37.3468,631.663,656070.0,171550.0,573.426,6488.61,2694.77,2662.23
3,BKK,195140.0,3266.64,0.0,354599.0,522445.0,220839.0,230269.0,42905.0,396046.0,287.333,1112.95,447604.0,821.049,312298.0,65145.0,294171.0,392220.0,290003.0,352779.0,574536.0
4,BOM,89757.0,1714.85,351024.0,0.0,6043.99,92799.0,449.739,3528.6,3415650.0,258.813,857.48,271658.0,278.04,102930.0,2557.32,256712.0,5690.02,8124.39,6710.38,427129.0
5,CAN,2764.57,943.002,491639.0,4882.2,0.0,88436.0,99434.0,865435.0,41800.0,100.229,170.751,0.0,462.087,218577.0,61028.0,2570.29,50302.0,1343160.0,163628.0,289948.0
6,CDG,181990.0,200136.0,193729.0,92850.0,88144.0,0.0,1117.0,1783.66,137856.0,1116.63,49692.9,144179.0,55.5344,83736.7,255414.0,311779.0,2537.42,287340.0,193404.0,162440.0
7,CGK,1186.78,282.891,230269.0,5578.65,99592.0,1416.91,0.0,9052.52,5423.99,1.11253,159.218,279656.0,413.434,253377.0,2315.65,1972.77,48336.0,38052.0,37296.0,533552.0
8,CTU,42272.0,256.337,42905.0,3528.6,865947.0,2579.71,9052.52,0.0,4251.76,1.5718,81.8455,230530.0,830.77,79664.0,1438.72,2894.34,5690.02,1889790.0,949444.0,65062.0
9,DEL,97984.4,1881.77,433770.0,3563370.0,41800.0,137856.0,1033.53,4249.91,0.0,520.992,1287.4,291645.0,683.035,107310.0,3584.14,281440.0,5690.02,68330.0,104763.0,302395.0
10,DEN,1308.64,831241.0,227.832,184.077,88.2505,1100.65,0.0,1.38833,323.894,0.0,230.178,301.502,0.0,59.1831,505605.0,96536.4,103.943,1093.19,479.171,316.269


In [23]:
migration_matrix , popsiz= deserialize("cleaned_adjacency_matrix")

([0.0 0.0005806082356047729 … 0.00015669937280592233 0.0005540071350760907; 0.002679083764248333 0.0 … 2.7439891290337976e-6 1.1827096922185234e-5; … ; 0.003059215714388128 1.1118315529434092e-5 … 0.0 0.0025055561532145404; 0.002590854407125959 1.0984055958609081e-5 … 0.0005722101866802045 0.0], [1073000, 4661000, 8484000, 12442373, 10546000, 10514000, 9733000, 7570000, 16787941, 2428000, 11310000, 7066000, 36975000, 6005000, 12197000, 9800000, 12075689, 16938000, 20948000, 5176000])

In [24]:
popsiz

20-element Vector{Int64}:
  1073000
  4661000
  8484000
 12442373
 10546000
 10514000
  9733000
  7570000
 16787941
  2428000
 11310000
  7066000
 36975000
  6005000
 12197000
  9800000
 12075689
 16938000
 20948000
  5176000