Duplicated cell for data import

In [1]:
#r "nuget: FSharp.Stats, 0.5.1-preview.1"
#r "nuget: Plotly.NET.Interactive, 4.2.1"
#r "nuget: Cytoscape.NET.Interactive, 0.2.0"


open FSharp.Stats
open Plotly.NET
open Plotly.NET.StyleParam
open Plotly.NET.LayoutObjects
open FSharp.Data
open Cytoscape.NET
open System

// use a script to import data in all notebooks > 1
#load "import.fsx"
open Import
let orders = Import.orders

Loading extensions from `C:\Users\schne\.nuget\packages\plotly.net.interactive\4.2.1\interactive-extensions\dotnet\Plotly.NET.Interactive.dll`

Loading extensions from `C:\Users\schne\.nuget\packages\cytoscape.net.interactive\0.2.0\interactive-extensions\dotnet\Cytoscape.NET.Interactive.dll`

# Analysis I - basic stats

The first global analysis is the visualization of how many products of the individual categories are logged in a work day (Mo-Fr). Therefore we need a function, that filters the data accordingly, determines the number of logs each day and creates a histogram.

In [2]:
/// gets a category as input and returns a histogram of daily logs
let getHistogramOfCategory (category: Category) = 
    orders
    |> Array.filter (fun x -> x.Category = category && x.DateTime.DayOfWeek <> System.DayOfWeek.Saturday && x.DateTime.DayOfWeek <> System.DayOfWeek.Sunday)
    |> Array.groupBy (fun x -> x.DateTime.ToShortDateString())
    |> Array.map (snd >> Array.length)
    |> fun x -> 
        Chart.Histogram(x,Opacity=0.5)
        //Chart.Histogram(x,Opacity=0.5,HistNorm=StyleParam.HistNorm.Probability)
        |> Chart.withTraceInfo (category.ToString())

let histoRawBeer =  getHistogramOfCategory Category.Beer
let histoRawCoffe = getHistogramOfCategory Category.Coffee
let histoRawBeverage  = getHistogramOfCategory Category.Beverage

[
histoRawBeer 
histoRawCoffe
histoRawBeverage
] 
|> Chart.combine
|> Chart.withLayoutStyle(BarMode=StyleParam.BarMode.Overlay)
|> Chart.withXAxisStyle "logs per work day"
|> Chart.withYAxisStyle "Frequency"

## Global logging heatmap

Let's investigate wether we can visually identify global trends if we create `logging traces` for each person.

The result should be a heatmap that shows the amount of ticks per day for each person in a row.

Let's also add annotation for some significant dates of choice

In [3]:
let allPersons = 
    orders
    |> Array.map (fun x -> x.Name)
    |> Array.distinct

let signal = 
    orders
    |> Array.groupBy (fun x -> x.DateTime.Date)
    |> Array.map (fun (date,orders) -> 
        
        let ordersPerDay = 
            allPersons
            |> Array.map (fun name -> 
                orders
                |> Array.filter (fun order -> order.Name = name)
                |> Array.length
                |> float
                )
        ordersPerDay
        )
    |> JaggedArray.transpose


// let filteredPersons,filteredSignal = 
//     Array.zip allPersons signal
//     |> Array.filter (fun (name,signal) -> 
//         let days = signal.Length
//         let ticking = signal |> Array.filter (fun x -> x > 0.) |> Array.length
//         float ticking / float days > 0.05
//         )
//     |> Array.unzip

Chart.Heatmap(
    signal,
    colNames= (
        orders 
        |> Array.map (fun x -> x.DateTime.ToShortDateString()) 
        |> Array.distinct),
    rowNames=allPersons)
|> Chart.withSize(1400.,900.)
// |> Chart.withAnnotations [
//     Annotation.init(
//         Text = "Covid pandemic starts"
//         // X = System.DateTime.Parse("04/01/2020"),
//         // Y = 0.5,
//         // YRef = "paper"
//     )
// ]
