Duplicated cell for data import

In [13]:
#r "nuget: FSharp.Stats, 0.5.1-preview.1"
#r "nuget: Plotly.NET.Interactive, 4.2.1"
#r "nuget: Cytoscape.NET.Interactive, 0.2.0"


open FSharp.Stats
open Plotly.NET
open Plotly.NET.StyleParam
open Plotly.NET.LayoutObjects
open FSharp.Data
open Cytoscape.NET
open System

// use a script to import data in all notebooks > 1
#load "import.fsx"
open Import
let orders = Import.orders

# Analysis I - basic stats

The first global analysis is the visualization of how many products of the individual categories are logged in a work day (Mo-Fr). Therefore we need a function, that filters the data accordingly, determines the number of logs each day and creates a histogram.

In [12]:
/// gets a category as input and returns a histogram of daily logs
let getHistogramOfCategory (category: Category) = 
    orders
    |> Array.filter (fun x -> x.Category = category && x.DateTime.DayOfWeek <> System.DayOfWeek.Saturday && x.DateTime.DayOfWeek <> System.DayOfWeek.Sunday)
    |> Array.groupBy (fun x -> x.DateTime.ToShortDateString())
    |> Array.map (snd >> Array.length)
    |> fun x -> 
        Chart.Histogram(x,Opacity=0.5)
        //Chart.Histogram(x,Opacity=0.5,HistNorm=StyleParam.HistNorm.Probability)
        |> Chart.withTraceInfo (category.ToString())

let histoRawBeer =  getHistogramOfCategory Category.Beer
let histoRawCoffe = getHistogramOfCategory Category.Coffee
let histoRawBeverage  = getHistogramOfCategory Category.Beverage

[
histoRawBeer 
histoRawCoffe
histoRawBeverage
] 
|> Chart.combine
|> Chart.withLayoutStyle(BarMode=StyleParam.BarMode.Overlay)
|> Chart.withXAxisStyle "logs per work day"
|> Chart.withYAxisStyle "Frequency"

In [18]:

let dailyLogs = 
    orders
    |> Array.groupBy (fun x -> x.Name)
    |> Array.map (fun (name,orders) -> 
        let tmp = 
            orders
            |> Array.groupBy (fun o -> o.DateTime.Date)
            |> Array.map (snd >> Array.length >> float)
        name,tmp)

[
    dailyLogs
    |> Array.filter (fun (name,data) -> data.Length > 10)
    |> Array.map (fun (name,data) -> 
        Chart.Point([Seq.mean data],[Seq.stDev data],Name=name,MarkerColor=Color.fromHex (personColorMap.[name]))
        )
    |> Chart.combine
    |> Chart.withXAxisStyle "mean(logs per day)"
    |> Chart.withYAxisStyle "stDev(logs per day)"


    dailyLogs
    |> Array.filter (fun (name,data) -> data.Length > 10)
    |> Array.map (fun (name,data) -> 
        let cv = Seq.cv data
        cv,Chart.Column([cv],[name],MarkerColor=Color.fromHex personColorMap.[name],ShowLegend=false)
        )
    |> Array.sortBy fst
    |> Array.map snd
    |> Chart.combine
    |> Chart.withYAxisStyle "coefficient of variation (mean/stdev)"
]
|> Chart.Grid(1,2)
|> Chart.withMarginSize(Bottom=150)
|> Chart.withSize(1200,600)

In [22]:
let allPersons = 
    orders
    |> Array.map (fun x -> x.Name)
    |> Array.distinct

let signal = 
    orders
    |> Array.groupBy (fun x -> x.DateTime.Date)
    |> Array.map (fun (date,orders) -> 
        
        let ordersPerDay = 
            allPersons
            |> Array.map (fun name -> 
                orders
                |> Array.filter (fun order -> order.Name = name)
                |> Array.length
                |> float
                )
        ordersPerDay
        )
    |> JaggedArray.transpose
    
let pcapersoncs,pcaSignal = 
    Array.zip allPersons signal
    |> Array.filter (fun (name,signal) -> 
        let days = signal.Length
        let ticking = signal |> Array.filter (fun x -> x > 0.) |> Array.length
        float ticking / float days > 0.05
        )
    |> Array.unzip

Chart.Heatmap(pcaSignal,colNames=(orders |> Array.map (fun x -> x.DateTime.ToShortDateString()) |> Seq.distinct),rowNames=pcapersoncs)
|> Chart.withSize(1400.,900.)