// can't yet format YamlFrontmatter (["title: Housefly Wing Length"; "category: Datasets"; "categoryindex: 1"; "index: 4"], Some { StartLine = 2 StartColumn = 0 EndLine = 6 EndColumn = 8 }) to pynb markdown

[![Binder](/datasets/img/badge-binder.svg)](https://mybinder.org/v2/gh/plotly/Plotly.NET/gh-pages?filepath=04_HouseflyWingLength.ipynb)&emsp;
[![Script](/datasets/img/badge-script.svg)](/datasets/04_HouseflyWingLength.fsx)&emsp;
[![Notebook](/datasets/img/badge-notebook.svg)](/datasets/04_HouseflyWingLength.ipynb)

# The _Housefly Wing Length_ dataset

**Table of contents**

- [Description]()
- [How to use]()
- [Examples]()

## Description

Measured wing lengths of 100 houseflies in mm * 10^1.  
Taken from https://seattlecentral.edu/qelp/sets/057/057.html

Original literature: Sokal, R.R. and P.E. Hunter. 1955. "A morphometric analysis of DDT-resistant and non-resistant housefly strains" Ann. Entomol. Soc. Amer. 48: 499-507.


## How to use




In [1]:
#r "nuget: FSharp.Data"
#r "nuget: Deedle"

open FSharp.Data
open Deedle

let rawData = Http.RequestString @"https://raw.githubusercontent.com/fslaborg/datasets/main/data/HouseflyWingLength.txt"

let df = Frame.ReadCsvString(rawData, hasHeaders = false, schema = "wing length (mm * 10^1)")

df.Print()


wing length (mm * 10^1) 0  -> 36                      1  -> 37                      2  -> 38                      3  -> 38                      4  -> 39                      5  -> 39                      6  -> 40                      7  -> 40                      8  -> 40                      9  -> 40                      10 -> 41                      11 -> 41                      12 -> 41                      13 -> 41                      14 -> 41                      :     ...                     85 -> 50                      86 -> 50                      87 -> 50                      88 -> 50                      89 -> 50                      90 -> 51                      91 -> 51                      92 -> 51                      93 -> 51                      94 -> 52                      95 -> 52                      96 -> 53                      97 -> 53                      98 -> 54                      99 -> 55

## Examples

This example is taken from the FsLab datascience tutorial [t-test]()
(WIP)




In [2]:
#r "nuget: FSharp.Stats, 0.4.2"
#r "nuget: Plotly.NET, 2.0.0-preview.6"

open FSharp.Stats
open FSharp.Stats.Testing
open Plotly.NET

let seqDataHousefly =
    df
    |> Frame.getCol "wing length (mm * 10^1)"
    |> Series.values
    // We convert the values to mm
    |> Seq.map (fun x -> x / 10.)

let boxPlot = 
    Chart.BoxPlot(y = seqDataHousefly, Name = "housefly", Boxpoints = StyleParam.Boxpoints.All, Jitter = 0.2)
    |> Chart.withY_AxisStyle "wing length [mm]"


In [None]:
boxPlot


In [4]:
// The testing module in FSharp.Stats require vectors as input types, thus we transform our array into a vector:
let vectorDataHousefly = vector seqDataHousefly

// The expected value of our population.
let expectedValue = 4.5

// Perform the one-sample t-test with our vectorized data and our exptected value as parameters.
let oneSampleResult = TTest.oneSample vectorDataHousefly expectedValue


{ Statistic = 1.275624919  DegreesOfFreedom = 99.0  PValueLeft = 0.8974634108  PValueRight = 0.1025365892  PValue = 0.2050731784 }