### 1. Load imputed datasets for tourist proportions, calculate statistics from draws

In [None]:
library(data.table)
library(plyr)

folder <- "/share/scratch/users/mgriswol/"
setwd(folder)

#Read in only the files we want and append together in a datatable
files <- list.files(folder, pattern="^imputed.*csv$")
impute <- lapply(files, fread)
impute <- data.table(rbindlist(impute))

#Remove some columns and transform data
impute[, c("V1", "id", "location_id"):=NULL]
impute[, tourist_proportions:=exp(tourist_proportions)]

#Scale proportions to 1
impute[, tourist_proportions := tourist_proportions/sum(.SD[,tourist_proportions], na.rm=TRUE), 
           by=list(location_name, year_id, imputed)]

#Split data into raw data and estimates
data <- impute[imputed==0,]
data[, imputed:=NULL]

impute <- impute[imputed!=0,]

#Calculate estimates and uncertainty
impute[, `:=`(mean = mean(tourist_proportions),
             lower = quantile(tourist_proportions, 0.05),
             upper = quantile(tourist_proportions, 0.95)),
       by=.(location_name, visiting_country, year_id)]

#Collapse dataset and compare to original data
impute[, c("tourist_proportions", "imputed"):=NULL]
impute <- unique(impute)

data[, imputed:=NULL]
results <- join(data, impute, by=c("location_name", "visiting_country", "year_id"), type="full")

write.csv(results, "/snfs2/HOME/mgriswol/results.csv")

year_id,location_name,visiting_country,tourist_proportions,imputed
1995,Albania,Argentina,,0
1996,Albania,Argentina,,0
1997,Albania,Argentina,,0
1998,Albania,Argentina,,0
1999,Albania,Argentina,-8.877608,0
2000,Albania,Argentina,-8.363062,0
2001,Albania,Argentina,-8.586334,0
2002,Albania,Argentina,-8.464710,0
2003,Albania,Argentina,-8.676821,0
2004,Albania,Argentina,-8.970920,0


year_id,location_name,visiting_country,tourist_proportions,imputed
1995,Albania,Argentina,,0
1996,Albania,Argentina,,0
1997,Albania,Argentina,,0
1998,Albania,Argentina,,0
1999,Albania,Argentina,1.394774e-04,0
2000,Albania,Argentina,2.333288e-04,0
2001,Albania,Argentina,1.866390e-04,0
2002,Albania,Argentina,2.107769e-04,0
2003,Albania,Argentina,1.704923e-04,0
2004,Albania,Argentina,1.270512e-04,0


year_id,location_name,visiting_country,tourist_proportions,imputed
1995,Albania,Argentina,,0
1996,Albania,Argentina,,0
1997,Albania,Argentina,,0
1998,Albania,Argentina,,0
1999,Albania,Argentina,0.0001486215,0
2000,Albania,Argentina,0.0002363492,0
2001,Albania,Argentina,0.0001895976,0
2002,Albania,Argentina,0.0002156769,0
2003,Albania,Argentina,0.0001708080,0
2004,Albania,Argentina,0.0001279277,0


year_id,location_name,visiting_country,tourist_proportions
1995,Albania,Argentina,
1996,Albania,Argentina,
1997,Albania,Argentina,
1998,Albania,Argentina,
1999,Albania,Argentina,0.0001486215
2000,Albania,Argentina,0.0002363492
2001,Albania,Argentina,0.0001895976
2002,Albania,Argentina,0.0002156769
2003,Albania,Argentina,0.0001708080
2004,Albania,Argentina,0.0001279277


### 2. Combine with total tourists, duration of stay, and liters per capita to generate additive/subtractive adjustments.

In [26]:
results[check<0,]

year_id,location_name,visiting_country,tourist_proportions,mean,lower,upper,check
2014,Albania,Bulgaria,7.406488e-03,7.406488e-03,7.406488e-03,7.406488e-03,-8.673617e-19
2003,Albania,Canada,3.671865e-03,3.671865e-03,3.671865e-03,3.671865e-03,-8.673617e-19
2004,Albania,China,1.842243e-03,1.842243e-03,1.842243e-03,1.842243e-03,-4.336809e-19
2012,Albania,China,8.905229e-04,8.905229e-04,8.905229e-04,8.905229e-04,-2.168404e-19
2003,Albania,Colombia,5.742898e-05,5.742898e-05,5.742898e-05,5.742898e-05,-6.776264e-21
2001,Albania,Croatia,3.875675e-03,3.875675e-03,3.875675e-03,3.875675e-03,-4.336809e-19
2009,Albania,Czech Republic,3.772296e-03,3.772296e-03,3.772296e-03,3.772296e-03,-4.336809e-19
2007,Albania,El Salvador,1.420311e-05,1.420311e-05,1.420311e-05,1.420311e-05,-3.388132e-21
2008,Albania,Ireland,1.865852e-03,1.865852e-03,1.865852e-03,1.865852e-03,-2.168404e-19
2013,Albania,Israel,9.312074e-04,9.312074e-04,9.312074e-04,9.312074e-04,-1.084202e-19
