# Mean, Standard Deviation, Minimum, Maximum, Median, Mode

In [1]:
:!stack build csv



In [2]:
import Text.CSV
import Data.Maybe
import Data.List
import Data.Ord

In [3]:
baseball <- parseCSVFromFile "../data/gl2023.txt"

In [4]:
:type baseball

In [5]:
noEmptyRows = either (const[]) (filter (\row -> 2 <= length row))

In [6]:
baseballList = noEmptyRows baseball

In [7]:
length baseballList

2430

In [8]:
:type baseballList

In [9]:
:info Field

In [10]:
read "1" :: Integer

1

In [11]:
read "1.5" :: Double

1.5

In [12]:
readIndex :: Read cell => Either a CSV -> Int -> [cell]
readIndex csv index = map (read . (!! index)) (noEmptyRows csv)

In [13]:
readIndex baseball 9 :: [Integer]

[0,5,2,5,7,10,7,10,3,2,0,1,0,0,7,2,1,4,3,9,3,2,1,6,4,1,7,8,4,2,7,13,2,2,3,9,1,2,5,1,4,1,5,6,7,0,6,6,1,1,6,4,11,0,4,8,6,7,12,7,5,1,12,7,2,12,2,0,0,8,4,10,4,6,4,4,3,2,7,2,6,5,7,4,3,2,3,2,6,4,2,5,6,0,16,6,6,3,5,0,10,0,3,2,9,3,4,6,5,2,5,8,4,3,7,6,2,2,11,6,5,4,3,14,6,0,6,10,8,6,1,7,6,0,1,12,5,6,4,5,0,0,4,2,4,0,3,8,9,6,1,2,4,0,2,7,6,9,9,4,8,4,0,0,8,11,3,2,5,3,3,4,5,7,2,3,7,10,2,8,4,1,7,10,3,2,4,5,7,11,3,3,8,8,1,11,0,4,3,6,5,6,10,4,17,3,3,0,1,2,3,6,6,7,6,6,2,9,1,3,2,2,14,3,5,1,4,6,1,8,9,5,0,4,0,8,1,14,8,3,2,6,5,2,4,10,7,10,5,0,2,8,8,1,4,7,0,3,0,4,12,5,4,6,8,14,5,5,0,5,4,10,5,3,1,12,2,12,5,7,6,5,3,9,5,3,0,6,0,5,3,2,7,0,1,3,6,2,7,5,5,6,9,4,3,1,4,11,1,6,3,10,2,4,3,3,7,5,7,12,3,0,4,3,1,4,1,5,7,1,2,4,0,6,4,0,11,4,6,1,3,2,5,4,0,6,4,5,5,8,4,3,8,5,2,5,0,0,4,5,3,2,4,5,1,3,3,2,1,12,1,0,5,2,8,0,2,6,7,14,7,1,4,9,3,2,1,4,5,3,3,6,11,2,2,11,0,6,5,11,6,16,7,12,4,6,6,3,3,0,0,4,3,3,3,4,2,1,9,5,3,4,4,2,10,4,9,3,3,5,5,3,3,2,1,6,2,5,1,6,2,2,11,2,2,1,4,1,6,14,1,6,1,3,4,5,1,4,0,3,7,1,12,6,6,11,3,5,7,0,13,5,2,1,9,1,

In [14]:
awayRuns = readIndex baseball 9 :: [Integer]

In [15]:
maximum awayRuns

25

In [16]:
minimum awayRuns

0

In [17]:
:type (minimum, maximum)

In [18]:
range :: Ord a => [a] -> Maybe (a, a)
range [] = Nothing
range [x] = Just (x, x)
range xs = Just (minimum xs, maximum xs)

In [19]:
range awayRuns

Just (0,25)

In [20]:
range []

Nothing

In [21]:
sum awayRuns

11263

In [22]:
length awayRuns

2430

In [23]:
:type sum

In [24]:
:type length

In [25]:
:info realToFrac

In [26]:
realToFrac (sum awayRuns) / fromIntegral (length awayRuns)

4.634979423868312

In [27]:
mean :: Real a => [a] -> Maybe Double
mean [] = Nothing
mean [x] = Just $ realToFrac x
mean xs = Just $ realToFrac (sum xs) / fromIntegral (length xs)

In [28]:
mean []

Nothing

In [29]:
mean [1]

Just 1.0

In [30]:
mean awayRuns

Just 4.634979423868312

In [31]:
stdev :: Real a => [a] -> Maybe Double
stdev [] = Nothing
stdev [_] = Nothing
stdev xs = Just $ sqrt (sumsquares / n_m1)
    where
        n_m1 = fromIntegral (length xs - 1)
        meanxs = fromJust (mean xs)
        sumsquares = sum $ map (diffsquare . realToFrac) xs
        diffsquare x = (x - meanxs) * (x - meanxs)

In [32]:
stdev awayRuns

Just 3.285710591606553

In [33]:
(4.63 - 3.28, 4.63 + 3.28)

(1.35,7.91)

In [34]:
mean [10,8,10,8,8,4]

Just 8.0

In [35]:
stdev [10,8,10,8,8,1,2,2,1,2,1,3,2,1,1,1,1,1,1,1,4,10,10,10,10,10,10]

Just 4.050957468334666

### Median

In [36]:
oddList = [3,4,1,2,5]

In [37]:
evenList = [6,5,4,3,2,1]

In [38]:
sort oddList

[1,2,3,4,5]

In [39]:
sort evenList

[1,2,3,4,5,6]

With the odd list I can take the length of the sorted list and divide it by 2. I will get the middle element. Which is **median** of the list.

In [40]:
length oddList `div` 2

2

In [41]:
sort oddList !! 2

3

In [42]:
length evenList `div` 2

3

In [43]:
((sort evenList !! 3) + (sort evenList !! 2)) / 2

3.5

In [44]:
median :: Real a => [a] -> Maybe Double
median [] = Nothing
median list
    | odd (length list) = Just middleValue
    | otherwise = Just middleEven
    where
        sorted = sort list
        middleIndex = length list `div` 2
        middleValue = realToFrac $ sorted !! middleIndex
        beforeMiddle = realToFrac $ sorted !! (middleIndex -1)
        middleEven = 0.5 * (middleValue + beforeMiddle)

In [45]:
median []

Nothing

In [46]:
median oddList

Just 3.0

In [47]:
median evenList

Just 3.5

In [48]:
median awayRuns

Just 4.0

### Mode

The value in the list which appears most frequently.

In [50]:
myList = [4,4,5,5,4]

In [51]:
group myList

[[4,4],[5,5],[4]]

In [54]:
runLengthEncoding :: Ord a => [a] -> [(a, Integer)]
runLengthEncoding = map (\xs -> (head xs, genericLength xs)) . group

In [55]:
runLengthEncoding myList

[(4,2),(5,2),(4,1)]

In [58]:
runLengthEncoding (sort myList)

[(4,3),(5,2)]

In [59]:
mode :: Ord a => [a] -> Maybe (a, Integer)
mode [] = Nothing
mode list = Just $ maximumBy (comparing snd) pairs
    where
        sorted = sort list
        pairs = runLengthEncoding sorted

In [60]:
mode []

Nothing

In [61]:
mode myList

Just (4,3)

In [62]:
mode awayRuns

Just (3,334)