In [2]:
// nuget references
#r "nuget: FSharp.Stats, 0.5.1-preview.1"
#r "nuget: Plotly.NET.Interactive, 4.2.1"
#r "nuget: FSharp.Data, 6.3.0"
#r "nuget: Cytoscape.NET.Interactive, 0.2.0"

open FSharp.Stats
open Plotly.NET
open Plotly.NET.StyleParam
open Plotly.NET.LayoutObjects
open FSharp.Data
open Cytoscape.NET
open System


//FSharp.Stats.ServiceLocator.setEnvironmentPathVariable (@"C:\Users\bvenn\source\repos\FSharp.Stats\lib")
//FSharp.Stats.Algebra.LinearAlgebra.Service()

//axis styling extension module
module Chart = 
    let myAxis name = LinearAxis.init(Title=Title.init name,Mirror=StyleParam.Mirror.All,Ticks=StyleParam.TickOptions.Inside,ShowGrid=false,ShowLine=true)
    let withAxisTitles x y chart = 
        chart 
        |> Chart.withTemplate ChartTemplates.lightMirrored
        |> Chart.withXAxis (myAxis x) 
        |> Chart.withYAxis (myAxis y)

type Category =
    | Beer
    | Beverage
    | Coffee
    | Other
    with 
        static member FromString (s: string) =
            match s with
            | "Beer" -> Beer
            | "Beverage" -> Beverage
            | "Coffee" -> Coffee
            | _ -> Other

type Order = {
    DateTime    : System.DateTime
    Name        : string
    Gender      : char
    Product     : string
    Price       : float
    Department  : string
    Category    : Category
    Amount      : int
    } with
        static member Create time (name: string) gender product price department category amount = {
            DateTime  = time
            Name      = name
            Gender    = gender
            Product   = product
            Price     = price
            Department= department
            Category  = category
            Amount    = amount
            }

let data = 
    let read =
        CsvFile
            //.Load(@"..\data\coffeedata.txt")
            .Load(@"C:\Users\bvenn\source\repos\brewing-discoveries-workshop\data\coffeedata.txt")
            .Cache()
    read.Rows
    |> Seq.map (fun row -> 
        Order.Create
            (System.DateTime.ParseExact((row.GetColumn "DateTime"),"dd/MM/yyyy HH:mm:ss",null))
            (row.GetColumn "Name")
            (row.GetColumn "Gender" |> char)
            (row.GetColumn "Product")
            (row.GetColumn "Price" |> float) 
            (row.GetColumn "Department")
            ((row.GetColumn "Category") |> Category.FromString)
            (row.GetColumn "Amount" |> int)
        )
    |> Array.ofSeq



## Statistical Testing

You can use statistical testing to determine whether or not a two samples differ from each other. Several tests exists, each of which tests another property or is suited for a different scenario. For more information on the tests provided by FSharp.Stats please check out the [documentation](https://fslab.org/FSharp.Stats/Testing.html) or the `FSharp.Stats.Testing` module. The result of statistical tests in most cases is a p value, that describes how likely the observed outcome is to occur by chance. A common significance threshold is 0.05. If the p value of the test you performed is below this threshold, your probability to make a type I error (false positive) is below 5 % which for many cases is a sufficient accuracy.

The most popular hypothesis test is called t test and it is used to determine if the means of two samples differ significantly. For further insights please visit fslab [t test blog post](https://fslab.org/blog/posts/testing-t-test.html).

There are some conditions the samples should satisfy. They should be approximately normal distributed or follow a normal distribution shape. If this condition cannot be satisfied you should use a nonparametric test, as a Wilcoxon signed ranked test. If the variances of the samples you want to test differ a lot, you should use a Welch test instead. This t test alternative can be calculated by setting the `equal variances` parameter in `Testing.TTest.twoSample` to false.

Task:
Test wether a person drinks significantly more coffee per week than another. As you'll need a sufficient sample size, you can first plot the distribution of different people using histograms, box plots or violin plots.
Hint: You can assume the distributions of "Benedikt V." and "Michelle Porter" beeing normally distributed.


In [3]:
let coffeTicksVenny =
    data
    |> Array.filter (fun x -> x.Category = Coffee && x.Name = "Benedikt V.")
    |> Array.groupBy (fun x -> x.DateTime.Month,x.DateTime.Year)
    |> Array.map (fun (week, orders) ->
        float orders.Length)
    
coffeTicksVenny
|> Chart.Histogram


In [4]:
let coffeTicksLuk =
    data
    |> Array.filter (fun x -> x.Category = Coffee && x.Name = "Michelle Porter") //Nicholas Thomas
    |> Array.groupBy (fun x -> x.DateTime.Month,x.DateTime.Year)
    |> Array.map (fun (week, orders) ->
        float orders.Length)

coffeTicksLuk
|> Chart.Histogram

In [5]:
let p = Testing.TTest.twoSample false (vector coffeTicksVenny) (vector coffeTicksLuk)

[
    Chart.BoxPlot(Y=coffeTicksVenny,Name="Benedikt V.")
    Chart.BoxPlot(Y=coffeTicksLuk,Name="Michelle Porter")
]
|> Chart.combine
|> Chart.withTitle (string p.PValue)


## Test II

Test if there is a statistically significant change in beer consumption in winter vs summer season.
Please note, that people are joining and leaving the CSBar system. Does this even matter?



## Test III

Test if there is a statistically significant change in working day starts in winter vs summer season.
Please note, that this test is not suited to make a hard assumption, because the first coffee does not necessarily correspond to the start of the working day. Additionally there may be differences in the employees drinking behaviour depending on the season.

Hint: You can transform the daily starting time to floats.



In [17]:


let isolateFirstCoffee = 
    data
    |> Array.filter (fun order -> order.Category = Coffee & order.DateTime.Hour < 11)
    |> Array.groupBy (fun order -> order.Name)
    |> Array.map (fun (persoName,orders) ->
        orders
        |> Array.groupBy (fun order -> order.DateTime.ToShortDateString())
        |> Array.map (fun (date,orders) -> (Array.head orders).DateTime)
        )
    |> Array.concat


let summer = 
    isolateFirstCoffee
    |> Array.filter (fun firstOrder -> firstOrder.Month > 3 && firstOrder.Month < 10)
    |> Array.map (fun x -> float x.Hour + float x.Minute / 60.)

let winter = 
    isolateFirstCoffee
    |> Array.filter (fun firstOrder -> firstOrder.Month <= 3 || firstOrder.Month >= 10)
    |> Array.map (fun x -> float x.Hour + float x.Minute / 60.)

[
Chart.Histogram(summer,Name="Summer", OffsetGroup = "A", Opacity = 0.5)
Chart.Histogram(winter,Name="Winter", OffsetGroup = "B", Opacity = 0.5)
]
|> Chart.combine
|> Chart.withLayoutStyle(BarMode=BarMode.Overlay)
