[this doc on github](https://github.com/dotnet/interactive/tree/master/samples/notebooks/fsharp/Samples)

This demonstrates the use of `Microsoft.Data.Analysis` data frames with F#.You can open this example online using [MyBinder](https://mybinder.org/v2/gh/dotnet/interactive/master?filepath=fsharp%2FSamples%2FDataFrame-Getting%20Started.ipynb).

### Referencing the package


First, get the package and open the namespaces:

In [None]:
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet5/nuget/v3/index.json" 
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json" 

#r "nuget:Microsoft.Data.Analysis,0.2.0"
#r "nuget: XPlot.Plotly.Interactive, 3.0.4"

open Microsoft.Data.Analysis

### Creating a data frame

Create 3 columns to hold values of types `DateTime`, `int`, and `string`

In [None]:
let dateTimes = PrimitiveDataFrameColumn<DateTime>("DateTimes") // Default length is 0.
let ints = PrimitiveDataFrameColumn<int>("Ints", 3L) // Makes a column of Length 3. Filles with nulls initially.
let strings = StringDataFrameColumn("Strings", 3L)

Add some datetimes

In [None]:
dateTimes.Append(DateTime.Parse("2019/01/01"))
dateTimes.Append(DateTime.Parse("2019/01/01"))
dateTimes.Append(DateTime.Parse("2019/01/02"))

Create a `DataFrame` with 3 columns

In [None]:
let df = DataFrame([dateTimes; ints; strings]: DataFrameColumn list)

### Adding better default formatting for data frames

Create a formatter for data frames and data frame rows.

In [None]:
module DateFrameFormatter = 
    
    // Locally open the F# HTML DSL.
    open Html

    let maxRows = 20

    Formatter.Register<DataFrame>((fun (context: FormatContext) (df: DataFrame) (writer: TextWriter) ->

        // Don't generate nested tables
        if context.ContentThreshold < 1.0 then false else

        // Ask other formatters to reduce information generation
        context.ReduceContent(0.2) |> ignore

        let take = 20
        table [] [
          thead [] [
            th [] [ str "Index" ]
            for c in df.Columns do
              th [] [ str c.Name]
          ]
          tbody [] [
            for i in 0 .. min maxRows (int df.Rows.Count - 1) do
              tr [] [
                td [] [ embed context i ]
                for o in df.Rows.[int64 i] do
                  td [] [ embed context o ]
              ]
          ]
        ]
        |> writer.Write

        true
    ), mimeType = "text/html")
    
    Formatter.Register<DataFrameRow>((fun (context: FormatContext) (row: DataFrameRow) (writer: TextWriter) ->

        // Don't generate nested tables
        if context.ContentThreshold < 1.0 then false else

        // Ask other formatters to reduce information generation
        context.ReduceContent(0.2) |> ignore

        table [] [
          tbody [] [
            tr [] [
              for o in row do
                td [] [ embed context o ] 
            ]
          ]
        ]
        |> writer.Write

        true
    ), mimeType = "text/html")
    

Now view the data frame: 

In [None]:
df

### Modifying data frames

Change a value directly through df:

In [None]:
df.[0L, 1] <- 10
df

We can also modify the values in the columns through indexers defined in `PrimitiveDataColumn` and `StringColumn`

In [None]:
ints.[1L] <- Nullable 100
strings.[1L] <- "Foo!"
df

Check the data type

In [None]:
df.Info()

The `DataFrame` and the base `DataFrameColumn` class that all columns derive from expose a number of useful APIs: binary operations, computations, joins, merges, handling missing values and more.

In [None]:
df.["Ints"].Add(5, inPlace=true)
df

In [None]:
df.["Ints"] <- (ints / 5) * 100
df

Let's `null` it up!

In [None]:
df.["Ints"].FillNulls(-1, inPlace=true)
df.["Strings"].FillNulls("Bar", inPlace=true)
df

DataFrame exposes `Columns` property that we can enumerate over to access our columns. Here's how you can access the first row, though.

In [None]:
let row0 = df.Rows.[0L]
row0

In [None]:
row0

### Filtering and sorting data frames

Let's take a look at `Filter`, `Sort`, and `GroupBy`.

In [None]:
// Sort our dataframe using the Ints column
df.Sort("Ints", ascending=true)

In [None]:
// GroupBy
let grouped = df.GroupBy("DateTimes")
// Count of values in each group
grouped.Count()

In [None]:
let intGroupSum = grouped.Sum("Ints");
intGroupSum

### Charting columns from data frames

In [None]:
open XPlot.Plotly
open System.Linq

In [None]:
#r "nuget:MathNet.Numerics"

In [None]:
open MathNet.Numerics.Distributions

In [None]:
let mean = 0.0
let stdDev = 0.1

let normalDist = new Normal(mean, stdDev);

In [None]:
let doubles = PrimitiveDataFrameColumn<double>("Normal Distribution", normalDist.Samples().Take(1000));
// let ints = PrimitiveDataFrameColumn<int>("Ints", 3L) 
display(Chart.Plot(Graph.Histogram(x = doubles, nbinsx = 30)));