The objective of this notebook is to map the samples for each individual in the dataset

In [None]:
import Pkg
pkgs = [
    "Revise",
    "DataFrames",
    "StatsBase",
    "StatsPlots",
    "uCSV",
    "ProgressMeter",
    "Distances",
    "Clustering",
    "Colors",
    "MultivariateStats",
    "GeoMakie",
    "CairoMakie",
    "DataStructures"
]
# Pkg.add(pkgs)
for pkg in pkgs
    eval(Meta.parse("import $pkg"))
end
import Mycelia

In [None]:
data_dir = joinpath(dirname(pwd()), "data")

In [None]:
sample_paths = filter(x -> !occursin(".ipynb_checkpoints", x), readdir(joinpath(data_dir, "SRA"), join=true))

In [None]:
results_dir = joinpath(data_dir, "results")

In [None]:
# load in metadata
metadata_dir = joinpath(dirname(pwd()), "metadata")

In [None]:
exposome_environmental_data = DataFrames.DataFrame(uCSV.read(
    joinpath(metadata_dir, "metadata_exposome.rds.tsv"),
    delim='\t',
    header=1,
    typedetectrows=300
))

In [None]:
joint_sample_metadata = DataFrames.DataFrame(uCSV.read(
    joinpath(metadata_dir, "exposome/joint_sample_metadata.tsv"),
    delim='\t',
    header=1,
    typedetectrows=300
))

@assert joint_sample_metadata[!, "Library Name"] == joint_sample_metadata[!, "LibraryName"]

In [None]:
joint_metadata = DataFrames.innerjoin(
    joint_sample_metadata,
    exposome_environmental_data,
    on="Library Name" => "samplenames")

In [None]:
metadata_by_owner = DataFrames.groupby(joint_metadata, "aownership")

In [None]:
participant_to_lat_longs = DataStructures.OrderedDict{Any, Any}(
    x => missing for x in ["P1", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "Others"]
)
for individual_metadata_table in metadata_by_owner
    participant_id = individual_metadata_table[1, "aownership"]
    df = individual_metadata_table[:, ["latitude", "longitude"]]
    df[!, "latitude"] = something.(tryparse.(Float64, df[!, "latitude"]), missing)
    df[!, "longitude"] = something.(tryparse.(Float64, df[!, "longitude"]), missing)
    participant_to_lat_longs[participant_id] = DataFrames.dropmissing(df)
end
participant_to_lat_longs

In [None]:
colorscheme = Colors.distinguishable_colors(length(participant_to_lat_longs), [Colors.RGB(1,1,1), Colors.RGB(0,0,0)], dropseed=true)

In [None]:
# https://docs.makie.org/stable/explanations/figure/#matching_figure_and_font_sizes_to_documents
# 1 in == 72 pt
# Let's say your desired output size is 5 x 4 inches and you should use a font size of 12 pt.
# You multiply 5 x 4 by 72 to get 360 x 288 pt.
# The size you need to set on your Figure depends on the pt_per_unit value you want to use.
# When making plots for publications, you should usually just save with pt_per_unit = 1.
# So in our example, we would use Figure(size = (360, 288)) and for text set fontsize = 12 to match the 12 pt requirement.
        

# f = Figure(resolution = (5, 4) .* 72)

# fig = GeoMakie.Figure(
#     size=dims .* 72
# )

# If you save as a bitmap, multiply your size in inches by your desired dpi to get the resolution in pixels.
# Then save with px_per_unit = 1 (that is the default anyway but can be changed).


# if savings as PNG or JPG or other bit-mapped image files, multiple desired final image size in inches by the disired DPI to get image size
# if saving as SVG or PDF or other vector-graphic image file, multiply desired final image size in inches by 72 pt/in
fig = GeoMakie.Figure(size = (5, 4) .* 300)

ga = GeoMakie.GeoAxis(
    fig[1, 1]; # any cell of the figure's layout
    title = "Exposome sampling locations by participant",
    dest = "+proj=wintri", # the CRS in which you want to plot
    coastlines = true # plot coastlines from Natural Earth, as a reference.
)
ga.xticklabelsvisible[] = false
ga.yticklabelsvisible[] = false

P1 = GeoMakie.scatter!(ga, participant_to_lat_longs["P1"][!, "longitude"], participant_to_lat_longs["P1"][!, "latitude"]; color=colorscheme[1])
P2 = GeoMakie.scatter!(ga, participant_to_lat_longs["P2"][!, "longitude"], participant_to_lat_longs["P2"][!, "latitude"]; color=colorscheme[2])
P3 = GeoMakie.scatter!(ga, participant_to_lat_longs["P3"][!, "longitude"], participant_to_lat_longs["P3"][!, "latitude"]; color=colorscheme[3])
P4 = GeoMakie.scatter!(ga, participant_to_lat_longs["P4"][!, "longitude"], participant_to_lat_longs["P4"][!, "latitude"]; color=colorscheme[4])
P5 = GeoMakie.scatter!(ga, participant_to_lat_longs["P5"][!, "longitude"], participant_to_lat_longs["P5"][!, "latitude"]; color=colorscheme[5])
P6 = GeoMakie.scatter!(ga, participant_to_lat_longs["P6"][!, "longitude"], participant_to_lat_longs["P6"][!, "latitude"]; color=colorscheme[6])
P7 = GeoMakie.scatter!(ga, participant_to_lat_longs["P7"][!, "longitude"], participant_to_lat_longs["P7"][!, "latitude"]; color=colorscheme[7])
P8 = GeoMakie.scatter!(ga, participant_to_lat_longs["P8"][!, "longitude"], participant_to_lat_longs["P8"][!, "latitude"]; color=colorscheme[8])
Other = GeoMakie.scatter!(ga, participant_to_lat_longs["Others"][!, "longitude"], participant_to_lat_longs["Others"][!, "latitude"]; color=colorscheme[9])
GeoMakie.Legend(
    fig[1, 2],
    [P1, P2, P3, P4, P5, P6, P7, P8, Other],
    collect(keys(participant_to_lat_longs))
)

GeoMakie.save(
    "exposome-sampling-locations.png",
    fig,
    px_per_unit=1,
    )

# GeoMakie.save(
#     "test.pdf",
#     fig,
#     pt_per_unit=1,
#     )