# DataFrame
### (explore data with Microsoft.Data.Analysis)

In [1]:
#r "nuget:Microsoft.Data.Analysis"
using Microsoft.AspNetCore.Html;
using Microsoft.Data.Analysis;

Installed package Microsoft.Data.Analysis version 0.4.0

#### Load object formatters from external script file Formatters.csx

In [2]:
static string BuildHideRowsScript(long uniqueId)
{
    var script = $"var allRows = document.querySelectorAll('#table_{uniqueId} tbody tr:nth-child(n)'); ";
    script += "for (let i = 0; i < allRows.length; i++) { allRows[i].style.display='none'; } ";
    return script;
}

static string BuildPageScript(long uniqueId, int size)
{
    var script = $"var page = parseInt(document.querySelector('#page_{uniqueId}').innerHTML) - 1; ";
    script += $"var pageRows = document.querySelectorAll(`#table_{uniqueId} tbody tr:nth-child(n + ${{page * {size} + 1 }})`); ";
    script += $"for (let j = 0; j < {size}; j++) {{ pageRows[j].style.display='table-row'; }} ";
    return script;
}

static string GotoPageIndex(long uniqueId, long page)
{
    var script = $"document.querySelector('#page_{uniqueId}').innerHTML = {page + 1}; ";
    return script;
}

static string UpdatePageIndex(long uniqueId, int step, long maxPage)
{
    var script = $"var page = parseInt(document.querySelector('#page_{uniqueId}').innerHTML) - 1; ";
    script += $"page = parseInt(page) + parseInt({step}); ";
    script += $"page = page < 0 ? 0 : page; ";
    script += $"page = page > {maxPage} ? {maxPage} : page; ";
    script += $"document.querySelector('#page_{uniqueId}').innerHTML = page + 1; ";
    return script;
}

In [3]:
Formatter.Register<DataFrame>((df, writer) =>
            {
                const int MAX = 10000;
                const int SIZE = 10;

                var uniqueId = DateTime.Now.Ticks;

                var header = new List<IHtmlContent>
                {
                    th(i("index"))
                };
                header.AddRange(df.Columns.Select(c => (IHtmlContent)th(c.Name)));
                
                if (df.Rows.Count > SIZE)
                {
                    var maxMessage = df.Rows.Count > MAX ? $" (showing a max of {MAX} rows)" : string.Empty;
                    var title = h3[style: "text-align: center;"]($"DataFrame - {df.Rows.Count} rows {maxMessage}");

                    // table body
                    var maxRows = Math.Min(MAX, df.Rows.Count);
                    var rows = new List<List<IHtmlContent>>();
                    for (var index = 0; index < Math.Min(MAX, df.Rows.Count); index++)
                    {
                        var cells = new List<IHtmlContent>
                    {
                        td(i((index)))
                    };
                        foreach (var obj in df.Rows[index])
                        {
                            cells.Add(td(obj));
                        }
                        rows.Add(cells);
                    }

                    //navigator      
                    var footer = new List<IHtmlContent>();
                    BuildHideRowsScript(uniqueId);

                    var paginateScriptFirst = BuildHideRowsScript(uniqueId) + GotoPageIndex(uniqueId, 0) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptFirst]("⏮"));

                    var paginateScriptPrevTen = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, -10, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptPrevTen]("⏪"));

                    var paginateScriptPrev = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, -1, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptPrev]("◀️"));

                    footer.Add(b[style: "margin: 2px;"]("Page"));
                    footer.Add(b[id: $"page_{uniqueId}", style: "margin: 2px;"]("1"));

                    var paginateScriptNext = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, 1, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptNext]("▶️"));

                    var paginateScriptNextTen = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, 10, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptNextTen]("⏩"));

                    var paginateScriptLast = BuildHideRowsScript(uniqueId) + GotoPageIndex(uniqueId, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptLast]("⏭️"));

                    //table
                    var t = table[id: $"table_{uniqueId}"](
                        caption(title),
                        thead(tr(header)),
                        tbody(rows.Select(r => tr[style: "display: none"](r))),
                        tfoot(tr(td[colspan: df.Columns.Count + 1, style: "text-align: center;"](footer)))
                    );
                    writer.Write(t);

                    //show first page
                    writer.Write($"<script>{BuildPageScript(uniqueId, SIZE)}</script>");
                }
                else 
                {
                    var rows = new List<List<IHtmlContent>>();
                    for (var index = 0; index < df.Rows.Count; index++)
                    {
                        var cells = new List<IHtmlContent>
                        {
                            td(i((index)))
                        };
                        foreach (var obj in df.Rows[index])
                        {
                            cells.Add(td(obj));
                        }
                        rows.Add(cells);
                    }

                    //table
                    var t = table[id: $"table_{uniqueId}"](
                        thead(tr(header)),
                        tbody(rows.Select(r => tr(r)))
                    );
                    writer.Write(t);
                }
            }, "text/html");

In [5]:
Formatter.Register<SingleDataFrameColumn>((dataFrameRows, writer) =>
            {
                const int MAX = 10000;
                const int SIZE = 10;

                var uniqueId = DateTime.Now.Ticks;
                
                var header = new List<IHtmlContent>
                {
                    th(i("index"))
                };
                header.Add((IHtmlContent)th(dataFrameRows.Name));
              
                if (dataFrameRows.Length > SIZE)
                {
                    var maxMessage = dataFrameRows.Length > MAX ? $" (showing a max of {MAX} rows)" : string.Empty;
                    var title = h3[style: "text-align: center;"]($"DataFrame - {dataFrameRows.Length} rows {maxMessage}");

                    // table body
                    var maxRows = Math.Min(MAX, dataFrameRows.Length);
                    var rows = new List<List<IHtmlContent>>();
                    for (var index = 0; index < Math.Min(MAX, dataFrameRows.Length); index++)
                    {
                        var cells = new List<IHtmlContent>
                    {
                        td(i((index)))
                    };
                        cells.Add(td(dataFrameRows[index]));
                        rows.Add(cells);
                    }

                    //navigator      
                    var footer = new List<IHtmlContent>();
                    BuildHideRowsScript(uniqueId);

                    var paginateScriptFirst = BuildHideRowsScript(uniqueId) + GotoPageIndex(uniqueId, 0) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptFirst]("⏮"));

                    var paginateScriptPrevTen = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, -10, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptPrevTen]("⏪"));

                    var paginateScriptPrev = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, -1, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptPrev]("◀️"));

                    footer.Add(b[style: "margin: 2px;"]("Page"));
                    footer.Add(b[id: $"page_{uniqueId}", style: "margin: 2px;"]("1"));

                    var paginateScriptNext = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, 1, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptNext]("▶️"));

                    var paginateScriptNextTen = BuildHideRowsScript(uniqueId) + UpdatePageIndex(uniqueId, 10, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptNextTen]("⏩"));

                    var paginateScriptLast = BuildHideRowsScript(uniqueId) + GotoPageIndex(uniqueId, (maxRows - 1) / SIZE) + BuildPageScript(uniqueId, SIZE);
                    footer.Add(button[style: "margin: 2px;", onclick: paginateScriptLast]("⏭️"));

                    //table
                    var t = table[id: $"table_{uniqueId}"](
                        caption(title),
                        thead(tr(header)),
                        tbody(rows.Select(r => tr[style: "display: none"](r))),
                        tfoot(tr(td[style: "text-align: center;"](footer)))
                    );
                    writer.Write(t);

                    //show first page
                    writer.Write($"<script>{BuildPageScript(uniqueId, SIZE)}</script>");
                }
                else 
                {
                    var rows = new List<List<IHtmlContent>>();
                    for (var index = 0; index < dataFrameRows.Length; index++)
                    {
                        var cells = new List<IHtmlContent>
                        {
                            td(i((index)))
                        };
                        cells.Add(td(dataFrameRows[index]));
                        rows.Add(cells);
                    }

                    //table
                    var t = table[id: $"table_{uniqueId}"](
                        thead(tr(header)),
                        tbody(rows.Select(r => tr(r)))
                    );
                    writer.Write(t);
                }
            }, "text/html");

### Load data into data frame

In [6]:
//const string DATASET_PATH = "./taxi.csv";
const string DATASET_PATH = "./sensors_data_raw.csv";
var dataFrame = DataFrame.LoadCsv(DATASET_PATH);

In [7]:
dataFrame

index,Temperature,Luminosity,Infrared,Distance,CreatedAt,Source
⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️


In [8]:
dataFrame.Info()

index,Info,Temperature,Luminosity,Infrared,Distance,CreatedAt,Source
0,DataType,System.Single,System.Single,System.Single,System.Single,System.String,System.String
1,Length (excluding null values),803,803,803,803,803,803


#### The previous formatter for DataFrame can be extracted in an external library (like csx, dll, nuget)

In [9]:
#r "nuget:ApexCode.Interactive.Formatting,0.0.1-beta.5"
using ApexCode.Interactive.Formatting;

Formatters.Register<DataFrame>();

Installed package ApexCode.Interactive.Formatting version 0.0.1-beta.5

DataFrame formatter loaded.


#### data clamping

In [10]:
dataFrame

index,Temperature,Luminosity,Infrared,Distance,CreatedAt,Source
⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️


In [11]:
dataFrame.Clamp<float>(0, 100, true);
dataFrame

index,Temperature,Luminosity,Infrared,Distance,CreatedAt,Source
⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️,⏮⏪◀️Page1▶️⏩⏭️


In [12]:
var rows = dataFrame.Columns[5];

In [13]:
rows

index,Source
⏮⏪◀️Page1▶️⏩⏭️,


In [14]:
var rows = dataFrame.Rows;
rows

index,value
0,"[ 24.77, 63.96, 0, 100, 01/03/2020 10:22:01, FlashLight ]"
1,"[ 24.77, 64.06, 0, 100, 01/03/2020 10:22:02, FlashLight ]"
2,"[ 24.77, 64.06, 0, 71.52, 01/03/2020 10:22:03, FlashLight ]"
3,"[ 24.44, 63.18, 0, 100, 01/03/2020 10:22:04, FlashLight ]"
4,"[ 24.12, 69.14, 0, 100, 01/03/2020 10:22:05, FlashLight ]"
5,"[ 23.8, 65.62, 0, 13.86, 01/03/2020 10:22:06, FlashLight ]"
6,"[ 23.8, 66.41, 0, 100, 01/03/2020 10:22:07, FlashLight ]"
7,"[ 24.77, 68.85, 0, 100, 01/03/2020 10:22:08, FlashLight ]"
8,"[ 24.77, 69.14, 0, 100, 01/03/2020 10:22:09, FlashLight ]"
9,"[ 24.44, 67.58, 0, 100, 01/03/2020 10:22:10, FlashLight ]"


In [15]:
var temps = dataFrame.Rows.Select(r => r.Source == "Lighter");
temps


(1,42): error CS1061: 'DataFrameRow' does not contain a definition for 'Source' and no accessible extension method 'Source' accepting a first argument of type 'DataFrameRow' could be found (are you missing a using directive or an assembly reference?)



Cell not executed: compilation error

In [None]:
var infrareds = dataFrame.Rows.Where(r => r[5].Equals("Infrared"));
infrareds

#### eventualy, convert dataframe to collection for later use