In [None]:
using JLD2
using CSV
using Images
using Distances 
using DataFrames

In [None]:
function tuple2mat(pc)
    mat = zeros(Float64,length(pc),2)
    for i = 1:length(pc)
        mat[i,1] = pc[i][1]
        mat[i,2] = pc[i][2]
    end
    return mat
end
function sim2file(sim)
    id = sim[1]
    time = sim[2]
    return datapath*"\\ID-"*string(id)*string("_")*"time-"*string(time)*"_From2ParamSweep_Data.csv"   
end

In [None]:
function avg_dist_PQ(P,Q)
    p = size(P)[1]
    q = size(Q)[1]
    total_distance = 0.0
    
    for i = 1:p 
        closest_dist = Inf
        for j = 1:q
            dist = norm(collect(P[i,:][1]) - collect(Q[j,:][1]))
            closest_dist = minimum([closest_dist,dist])
        end
        total_distance = total_distance + closest_dist
    end
    return total_distance / q 

end

In [None]:
datapath = "C:\\Users\\jaydh\\Documents\\dissertation\\29072022\\29072022"

sims = load("sim_ids_and_times.jld2", "sims");

distinct_cell_types = ["Tumour","Macrophage","Vessel","Necrotic"]

non_topological_data_df = DataFrame(id = Int64[],
                                     time = Int64[], 
                                     labels = Int64[], 
                                     Mcount = Int64[],
                                     Tcount = Int64[],
                                     Ncount = Int64[], 
                                     MVdist = Float64[], 
                                     TVdist = Float64[], 
                                     NVdist = Float64[])

In [None]:
for sim in sims
    
    id = sim[1]
    time = sim[2]
    A = string(id)*"_"*string(time)

    df = CSV.read(sim2file(sim))
    cell_coordinates = Dict{String, Array{Tuple{Float64, Float64}, 1}}()
    for cell_type in distinct_cell_types
        # Filter the dataframe for the current cell type
        filtered_df = df[df.celltypes .== cell_type, :]
        
        # Extract x and y coordinates as a 2D array
        coordinates = [(x, y) for (x, y) in zip(filtered_df.points_x, filtered_df.points_y)]
    
        
        # Store the coordinates in the dictionary
        cell_coordinates[cell_type] = coordinates
    end
    M = cell_coordinates["Macrophage"];
    T = cell_coordinates["Tumour"];
    V = cell_coordinates["Vessel"];
    N = cell_coordinates["Necrotic"]

    
    #### Compute non topological statistics: 
    # cell counts, average distances between each cell and their closest blood vessels 

    M = tuple2mat(M)
    T = tuple2mat(T)
    V = tuple2mat(V)
    N = tuple2mat(N)


    Mn = size(M)[1]
    Tn = size(T)[1]
    Vn = size(V)[1]
    Nn = size(N)[1]

    MVdist = avg_dist_PQ(M,V)
    TVdist = avg_dist_PQ(T,V)
    NVdist = avg_dist_PQ(N,V)

    # add labels
    macrophage_df = df[df.celltypes .== "Macrophage", :]
    M1_count = count(0.0 .<= macrophage_df[!, "phenotypes"] .<= 0.5)
    label = 0
    if M1_count / Mn < 0.5
       label = 1 
    end

    # push data 

    push!(non_topological_data_df, (id = id,
                        time = time,
                        labels = label,
                        Mcount = Mn,
                        Tcount = Tn,
                        Ncount = Nn,
                        MVdist = MVdist,
                        TVdist = TVdist,
                        NVdist = NVdist))

end
# write to dataframe
#CSV.write("non_toological_dataframe.csv", non_topological_data_df)