## Digel et al. 2014 Soil Metanetwork --> SLNs ##

In [60]:
# load necessary Julia libraries
# these must be installed via the Julia repl or terminal environment. Do so with the following commands
# using Pkg
# Pkg.add("CSV")
using CSV,DelimitedFiles,DataFrames,Random,Distributions,LinearAlgebra,Graphs,FilePathsBase

# read in raw species occurrence data and filter out rows without observed occurrences
spp_occ_raw = CSV.read("SpeciesOccurrencesData.csv",DataFrame)
spp_occ_filtered = filter(:foundOnPlot => n -> n == 1, spp_occ_raw)

# store the names of the soil plots (sites)
plots = unique(spp_occ_filtered.plotid)
# store the species IDs in ascending order
species_ids = sort(unique(spp_occ_filtered.species_id))

# make an empty dataframe to store the species list for each plot
spp_list_plots_matrix = fill(0, length(plots), maximum(species_ids))
spp_list_plots = DataFrame(spp_list_plots_matrix, Symbol.(1:size(spp_list_plots_matrix,2))) # convert to DataFrame
spp_list_plots = hcat(DataFrame(plotid = plots), spp_list_plots) # add plots and rowlabels

# create lookup Dicts for fast indexing
plot_lookup = Dict(p => i for (i, p) in enumerate(plots))
species_lookup = Dict(string(s) => Symbol(string(s)) for s in species_ids)

# loop through spp_occ_filtered and update matrix
for row in eachrow(spp_occ_filtered)
    r = plot_lookup[row.plotid]
    c = species_lookup[string(row.species_id)]
    spp_list_plots[r, c] = 1
end

# create function to get the species present from a given plot as a vector of Ints
function get_present_species(spp_list_plots_df, plot_id)
    row_index = findfirst(==(plot_id), spp_list_plots.plotid)
    row_data = spp_list_plots[row_index, Not(:plotid)]
    return [parse(Int, String(name)) for (name, val) in pairs(row_data) if val == 1]
end

# import global list of feeding interactions and filter observed interactions
# in this dataset, for each interaction species 1 is the prey/resource and species 2 is the predator/consumer
metaweb_raw = CSV.read("Metawebinteractions.csv",DataFrame)
metaweb_filtered = filter(:feeding_interaction => n -> n == 1, metaweb_raw)

# make a Dict to store the list of interactions for each plot
plot_links = Dict{String, DataFrame}()

# loop through all the plots and create the list of interactions for species present at each, and store them in plot_links
for plot in eachrow(spp_list_plots)
    # Create an empty DataFrame for this plot
    df = DataFrame(prey = Int[], predator = Int[])

    # # Find species present on this plot
    present_species = get_present_species(spp_list_plots, plot.plotid)
    println(present_species) 
    # row_index = findfirst(==(row.plotid), presence_df.plotid)
    # row_data = presence_df[row_index, Not(:plotid)]
    # present_species = Set([parse(Int, String(s)) for (s, v) in pairs(row_data) if v == 1])

    # Loop through all potential interactions
    for row in eachrow(metaweb_filtered)
        sp1 = row.spec1
        sp2 = row.spec2

        # If both species are present 
        if sp1 in present_species && sp2 in present_species
            push!(df, (prey = sp1, predator = sp2))
        end
    end

    # Store the resulting link table using plot name as key
    plot_links[plot.plotid] = df
end

# read in the taxa info document and rename columns to match my speciesinfo format
species_master = CSV.read("SpeciesInfo.csv", DataFrame; missingstring="NA")
rename!(species_master, Dict(
    :number => :sp_id,
    :species => :sp_name,
    :group => :guild
))

# big loop to creat matrix and speciesinfo for each plot
for (plot, links_df) in plot_links
    # get local species list and diversity
    local_species = sort(unique(vcat(links_df.prey, links_df.predator)))
    local_index = Dict(id => i for (i, id) in enumerate(local_species))
    S = length(local_species)
    
    # Initialize empty matrix: rows = consumers, cols = resources
    A = zeros(Int, S, S)

    for row in eachrow(links_df)
        predator = row.predator
        prey = row.prey

        r = local_index[predator]  # row = consumer
        c = local_index[prey]      # col = resource

        A[r, c] = 1
    end

    # Save matrix to CSV (no headers or row names)
    CSV.write("matrix_$plot.csv", Tables.table(A); writeheader=false)
    
    # filter species info by plot
    local_info = semijoin(species_master, DataFrame(sp_id = local_species); on=:sp_id)
    sort!(local_info, :sp_id)

    ### check for NA/invalid taxa
    missing_rows = filter(row -> any(ismissing, row), local_info)
    if nrow(missing_rows) > 0
        @warn "$plot includes $(nrow(missing_rows)) taxa with missing info: $(missing_rows.sp_id)"
    end

    # output species info file
    select!(local_info, [:sp_name, :guild, :family, :sp_id])
    CSV.write("speciesinfo_$plot.csv", local_info)

end
