In [1]:
// nuget references
#r "nuget: FSharp.Stats, 0.5.1-preview.1"
//#r "nuget: Plotly.NET, 4.2.0"
#r "nuget: Plotly.NET.Interactive, 4.2.1"
#r "nuget: FSharp.Data, 6.3.0"
#r "nuget: Cytoscape.NET, 0.2.0"
#r "nuget: Cytoscape.NET.Interactive, 0.2.0"

open FSharp.Stats
open Plotly.NET
open Plotly.NET.StyleParam
open Plotly.NET.LayoutObjects
open FSharp.Data
open Cytoscape.NET
open System


//FSharp.Stats.ServiceLocator.setEnvironmentPathVariable (@"C:\Users\bvenn\source\repos\FSharp.Stats\lib")
//FSharp.Stats.Algebra.LinearAlgebra.Service()

//axis styling extension module
module Chart = 
    let myAxis name = LinearAxis.init(Title=Title.init name,Mirror=StyleParam.Mirror.All,Ticks=StyleParam.TickOptions.Inside,ShowGrid=false,ShowLine=true)
    let withAxisTitles x y chart = 
        chart 
        |> Chart.withTemplate ChartTemplates.lightMirrored
        |> Chart.withXAxis (myAxis x) 
        |> Chart.withYAxis (myAxis y)

type Category =
    | Beer
    | Beverage
    | Coffee
    | Other
    with 
        static member FromString (s: string) =
            match s with
            | "Beer" -> Beer
            | "Beverage" -> Beverage
            | "Coffee" -> Coffee
            | _ -> Other

type Order = {
    DateTime    : System.DateTime
    Name        : string
    Gender      : char
    Product     : string
    Price       : float
    Department  : string
    Category    : Category
    Amount      : int
    } with
        static member Create time (name: string) gender product price department category amount = {
            DateTime  = time
            Name      = name
            Gender    = gender
            Product   = product
            Price     = price
            Department= department
            Category  = category
            Amount    = amount
            }

let data = 
    let read =
        CsvFile
            //.Load(@"..\data\coffeedata.txt")
            .Load(@"C:\Users\bvenn\source\repos\brewing-discoveries-workshop\data\coffeedata.txt")
            .Cache()
    read.Rows
    |> Seq.map (fun row -> 
        Order.Create
            (System.DateTime.ParseExact((row.GetColumn "DateTime"),"dd/MM/yyyy HH:mm:ss",null))
            (row.GetColumn "Name")
            (row.GetColumn "Gender" |> char)
            (row.GetColumn "Product")
            (row.GetColumn "Price" |> float) 
            (row.GetColumn "Department")
            ((row.GetColumn "Category") |> Category.FromString)
            (row.GetColumn "Amount" |> int)
        )
    |> Array.ofSeq
    |> Array.sortBy (fun x -> x.DateTime)

let getDepartmentColor (department: string) = 
    match department with 
    | "Breakroom Bandits" -> "#2b3ae9"
    | "Genesis" -> "#f7da41"
    | "We Tried" -> "#008b66"
    | "No Lucks Given" -> "#987200"
    | "Toon Squad" -> "#ff7f0e"
    | "Rumor Spreaders" -> "#20b2aa"
    | "Risky Biscuits" -> "#a230ed"
    | "Recruitables" -> "#d21102"
    | "Employees of the Moment" -> "#19d3f3"
    | "Chargers" -> "#dea57b"
    | "Kickstarters" -> "#dea57b"
    | _ -> "#8b8b8b"


let person2Color = 
    data 
    |> Array.map (fun x -> x.Name,getDepartmentColor x.Department) 
    |> Array.distinct
    |> Map.ofArray

Loading extensions from `C:\Users\bvenn\.nuget\packages\plotly.net.interactive\4.2.1\interactive-extensions\dotnet\Plotly.NET.Interactive.dll`

Loading extensions from `C:\Users\bvenn\.nuget\packages\cytoscape.net.interactive\0.2.0\interactive-extensions\dotnet\Cytoscape.NET.Interactive.dll`

In [2]:

Chart.Point([1,2])
|> Chart.withDescription [Giraffe.ViewEngine.HtmlElements.rawText "Hello"]

Some of FSharp.Stats functionalities require the usage of [LAPACK](https://www.netlib.org/lapack/) routines. After the initial package download you can find these at `C:\Users\USERNAME\.nuget\packages\fsharp.stats\0.5.1-preview.1\netlib_LAPACK`. In the prepared use cases it is not necessary to load it but if you want, the next two lines do the job 

In [3]:
//FSharp.Stats.ServiceLocator.setEnvironmentPathVariable (@"C:\Users\USERNAME\.nuget\packages\fsharp.stats\0.5.1-preview.1\netlib_LAPACK")
//FSharp.Stats.Algebra.LinearAlgebra.Service()

## Social network generation

The data allows the construction of a social network of drinking partners. In theory drinking partners are likely to log their drinks within a short period of time. Of course this assumption is prone to error because there are two logging devices in different building, and additionally external factors (like the end of a lecture many people attend) are likely to cause simultaneous thirst.

To start this analysis we map over all orders, and for each order isolate orders that are within a short time period (e.g. 1 minute prior and after). From these orders we can isolate the user names because that the only thing we are currently interested in. To remove self references, an additional filter step is required. 


In [4]:
let drinkingpartners = 
    data
    |> Array.map (fun x -> 
        data
        |> Array.filter (fun t -> 
            //x.DateTime < t.DateTime.AddMinutes 1 && x.DateTime > t.DateTime.AddMinutes -1
            let timeRange = x.DateTime - t.DateTime
            abs timeRange.TotalMinutes < 1
            )
        |> Array.map (fun drinkPartner -> 
            x.Name,drinkPartner.Name
            )
        |> Array.filter (fun (a,b) -> a <> b)
        )
    |> Array.concat

drinkingpartners

index,value
,
,
,
,
,
,
,
,
,
,

Unnamed: 0,Unnamed: 1
Item1,Douglas Powell
Item2,Nicholas Thomas

Unnamed: 0,Unnamed: 1
Item1,Douglas Powell
Item2,Patrick Holmes

Unnamed: 0,Unnamed: 1
Item1,Douglas Powell
Item2,Muhammed Sullivan

Unnamed: 0,Unnamed: 1
Item1,Nicholas Thomas
Item2,Douglas Powell

Unnamed: 0,Unnamed: 1
Item1,Nicholas Thomas
Item2,Patrick Holmes

Unnamed: 0,Unnamed: 1
Item1,Nicholas Thomas
Item2,Muhammed Sullivan

Unnamed: 0,Unnamed: 1
Item1,Patrick Holmes
Item2,Douglas Powell

Unnamed: 0,Unnamed: 1
Item1,Patrick Holmes
Item2,Nicholas Thomas

Unnamed: 0,Unnamed: 1
Item1,Patrick Holmes
Item2,Muhammed Sullivan

Unnamed: 0,Unnamed: 1
Item1,Muhammed Sullivan
Item2,Douglas Powell

Unnamed: 0,Unnamed: 1
Item1,Muhammed Sullivan
Item2,Nicholas Thomas

Unnamed: 0,Unnamed: 1
Item1,Muhammed Sullivan
Item2,Patrick Holmes

Unnamed: 0,Unnamed: 1
Item1,Emma Roman
Item2,Eleanor Macdonald

Unnamed: 0,Unnamed: 1
Item1,Eleanor Macdonald
Item2,Emma Roman

Unnamed: 0,Unnamed: 1
Item1,Eleanor Macdonald
Item2,Hannah Walters

Unnamed: 0,Unnamed: 1
Item1,Hannah Walters
Item2,Eleanor Macdonald

Unnamed: 0,Unnamed: 1
Item1,Hannah Walters
Item2,Jasmine Sutton

Unnamed: 0,Unnamed: 1
Item1,Jasmine Sutton
Item2,Hannah Walters

Unnamed: 0,Unnamed: 1
Item1,Hugo Green
Item2,Abigail Payne

Unnamed: 0,Unnamed: 1
Item1,Hugo Green
Item2,Abigail Payne


From there it's an easy task to determine the number of simultaneous drinking.

In [5]:
let partnerCounts = 
    drinkingpartners
    |> Array.countBy id

partnerCounts

As discussed earlier, it is possible to become a drinking parter by chance. To reduce the probability of getting false positives, it is recommended to filter sparse relationships. 
Therefore you could either just set an arbitrary threshold (e.g. 3) or you could visualize the count distribution and make an educated guess about an appropriate threshold.

In [6]:
partnerCounts
|> Array.map snd
|> Chart.Histogram
|> Chart.withXAxisStyle "Total number of simultaneous drinks of two people"
|> Chart.withYAxisStyle "count of occurances"


It becomes apparent, that most of the person-person relations have a simultaneous drink just a few times (<5). A threshold of e.g. 8 seems to be appropriate. Another thing you may noticed is the histogram counts are always multiple of 2.
This is due to the fact, that not only `(Hugo Green, Abigail Payne)` has a drinking count of `21`, but also `(Abigail Payne, Hugo Green)`.
Because we are not interested in a directed network where it does matter who took a drink first, we just can ignore half of the data: 

In [7]:
let filteredPartnerCounts = 
    partnerCounts
    |> Array.filter (fun (names,sharedDrinkingCount) -> sharedDrinkingCount >= 8)
    |> Array.distinctBy (fun ((name1,name2),_) -> [name1;name2] |> List.sort) 

Create a chart that visualizes these counts.

In [8]:
filteredPartnerCounts
|> Array.map (fun ((name1,name2),sharedDrinkingCount) -> $"{name1} - {name2}", sharedDrinkingCount)
|> Array.sortByDescending snd
|> Chart.Bar


In [9]:

let getCytoVertices (input: ((string*string)*int) []) = 
    input
    |> Seq.collect (fun ((s,t),w) ->
        let stylingSource = [CyParam.label s; CyParam.weight 12; CyParam.color person2Color.[s]]
        let stylingTarget = [CyParam.label t; CyParam.weight 12; CyParam.color person2Color.[t]]
        [|Elements.node s stylingSource;Elements.node t stylingTarget|]
        )
    |> Seq.distinct

let getCytoEdges (input: seq<(string*string)*int>)= 
    input 
    |> Seq.distinct
    |> Seq.mapi (fun i ((s,t),w) -> 
        //let styling = [CyParam.weight (sqrt (float w / 2.))]
        let styling = [CyParam.weight (log (float w))]
        Elements.edge ("e" + string i) s t styling
        )

let goVertices = getCytoVertices filteredPartnerCounts
let goEdges = getCytoEdges filteredPartnerCounts


let cytoGraph vertices edges = 
    CyGraph.initEmpty ()
    |> CyGraph.withElements vertices
    |> CyGraph.withElements edges
    |> CyGraph.withStyle "node" 
        [
            CyParam.shape "circle"
            CyParam.content =. CyParam.label
            CyParam.Text.Outline.color "#000000"
            CyParam.Text.Outline.width 1   
            CyParam.color "#FFFFFF"
            CyParam.Background.color =.CyParam.color //"grey"//
            CyParam.Border.color "#A00975"
        ]
    |> CyGraph.withStyle "edge" 
        [
            CyParam.Line.color "grey" //"#3D1244"
            CyParam.Curve.style "bezier"
            CyParam.width =. CyParam.weight
        ]
    |> CyGraph.withLayout (Layout.initCose id)   

cytoGraph goVertices goEdges


## Correlation network

Besides a social network we can also generate a day-based correlation network. Here we assign high correlation scores to a user-user pair, if their drinking behaviour is similar.

While this seems like that this analysis will show the same results as the network before, the readout will be different.

The most common correlation measure is the Pearsons correlation coefficient. It ranges from -1 to 1, while 0 indicates no correlation at all and 1 indicates a perfect correlation of two collections.

To be able to calculate correlations between two people, you could encode days when someone logged a drink by 1 and 0 otherwise.

Task: Create a nested collection as float [] [], that for each person contains an array of 1. or 0..

```fsharp
[ //     | drank some kind of beverage on the second day since logging start
    [0.; 1.; 1.; ...] //Nicholas Powell
    [0.; 1.; 0.; ...] //Timo M.
    [1.; 0.; 1.; ...] //Chloe Perkins
]
```

By calling `Matrix.ofJaggedArray` or just `matrix` for short, you can convert this jagged array into a matrix.

In [10]:
let allPersonNames = 
    data
    |> Array.map (fun x -> x.Name)
    |> Array.distinct

let firstTicks = 
    allPersonNames
    |> Array.map (fun name -> 
        data
        |> Array.find (fun order -> order.Name = name)
        |> fun x -> x.DateTime
        )       

let encodings = 
    data
    |> Array.groupBy (fun x -> x.DateTime.Date)
    |> Array.map (fun (date,orders) -> 
        allPersonNames 
        |> Array.mapi (fun nameIndex name -> 
            let didPersonDrinkAtThisDate =
                orders 
                |> Array.exists (fun order -> order.Name = name)
            if date <= firstTicks.[nameIndex] then 
                0. 
            else 
                if didPersonDrinkAtThisDate then 1. else -1
        )
    )
    |> JaggedArray.transpose
        
encodings


index,value
0,"[ 0, 1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, 1, -1 ... (1627 more) ]"
1,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1 ... (1627 more) ]"
2,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1 ... (1627 more) ]"
3,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1 ... (1627 more) ]"
4,"[ 0, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ... (1627 more) ]"
5,"[ 0, 1, -1, -1, 1, 1, 1, 1, -1, 1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1 ... (1627 more) ]"
6,"[ 0, 1, -1, -1, 1, 1, 1, 1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 ... (1627 more) ]"
7,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1 ... (1627 more) ]"
8,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ... (1627 more) ]"
9,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, -1 ... (1627 more) ]"


In [11]:
Array.zip allPersonNames encodings
|> DisplayExtensions.DisplayTable

Item1,Item2
Justin Bennett,"[ 0, 1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, 1, -1 ... (1627 more) ]"
Timo M.,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1 ... (1627 more) ]"
Nicholas Thomas,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1 ... (1627 more) ]"
Archie Nelson,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1 ... (1627 more) ]"
Benedikt V.,"[ 0, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ... (1627 more) ]"
Philip Reilly,"[ 0, 1, -1, -1, 1, 1, 1, 1, -1, 1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1 ... (1627 more) ]"
Hugo Green,"[ 0, 1, -1, -1, 1, 1, 1, 1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 ... (1627 more) ]"
Douglas Powell,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1 ... (1627 more) ]"
Muhammed Sullivan,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 ... (1627 more) ]"
Scott Woods,"[ 0, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, -1 ... (1627 more) ]"


The encoded matrix already is in a form that can be used to calculate a pairwise pearson correlation matrix.

Use an appropriate function from the FSharp.Stats.Correlation module and visualize the resulting correlation matrix as heatmap.

In [12]:
allPersonNames

In [19]:
let nameA,exampleA = allPersonNames.[4],encodings.[4] 
let nameB,exampleB = allPersonNames.[15],encodings.[15]

Chart.PointDensity(exampleA,exampleB) 
|> Chart.withTitle $"Encoding pairs {nameA} vs {nameB}"
|> Chart.withXAxisStyle $"Encodings {nameA}"
|> Chart.withYAxisStyle $"Encodings {nameB}"
|> Chart.withAnnotation
    (Annotation.init(X=0.,Y=1.5,Text="0: not yet enroled<br>-1: not present that day<br>1: present that day",ShowArrow=false))
|> Chart.withTemplate ChartTemplates.lightMirrored

In [6]:
let corrMat = 
    Correlation.Matrix.rowWisePearson (matrix encodings)

corrMat
|> Matrix.toJaggedArray
|> fun x -> Chart.Heatmap (x,colNames = allPersonNames,rowNames=allPersonNames)


To get an intuition of what the correlation distribution looks like, you can create a histogram out of the correlation coefficients. Note, that you should filter values of 1. since they are self-references.

In [68]:
corrMat
|> Matrix.toJaggedArray
|> Array.concat
|> Array.filter (fun x -> x <> 1.)
|> Chart.Histogram
|> Chart.withAxisTitles "pearson correlation" "count"


In [8]:
let correlationThreshold = 
    //precomputed because of runtime and LAPACK dependency
    //Testing.RMT.compute 0.9 0.01 0.05 corrMat
    0.671875

let mutable nodelist : string list= []
let mutable edgelist: (string*string*float) list= []

corrMat
|> Matrix.mapi (fun r c x -> 
    if r < c then 
        if x > correlationThreshold then 
            nodelist <- allPersonNames.[r]::(allPersonNames.[c]::nodelist)
            edgelist <- (allPersonNames.[r],allPersonNames.[c],x)::edgelist
            1.
        else 0.
    else 0.)

let csbCytoVertices = 
    nodelist
    |> Seq.collect (fun s ->
        let stylingSource = [CyParam.label s; CyParam.weight 12; CyParam.color person2Color.[s]]
        [|Elements.node s stylingSource|]
        )
    |> Seq.distinct

let csbCytoEdges = 
    edgelist 
    |> Seq.distinct
    |> Seq.mapi (fun i (s,t,w) -> 
        let styling = [CyParam.weight (3. / abs w)]
        Elements.edge ("e" + string i) s t styling
        )

CyGraph.initEmpty ()
|> CyGraph.withElements csbCytoVertices
|> CyGraph.withElements csbCytoEdges
|> CyGraph.withStyle "node" 
    [
        CyParam.shape "circle"
        CyParam.content =. CyParam.label
        CyParam.Background.color  =.CyParam.color //"grey"//
        CyParam.Text.Outline.color "#000000"
        CyParam.Text.Outline.width 1   
        CyParam.color "#FFFFFF"
        CyParam.Border.color "#A00975"
    ]
|> CyGraph.withStyle "edge" 
    [
        CyParam.Line.color "grey"
        CyParam.Curve.style "bezier"
        CyParam.width =. CyParam.weight
    ]
|> CyGraph.withLayout (Layout.initCose id)  
|> CyGraph.withSize (1300,1000)


In [61]:
let l = 
    data
    |> Array.find (fun x -> x.Name = "Leia Patton")

let v = 
    data
    |> Array.filter (fun x -> 
        x.Name = "Victor Clark"
        && x.DateTime.ToShortDateString() = System.DateTime(2023,5,22).ToShortDateString()
        )

l,v