## 言語処理100本ノック2020 in Haskell

### chapter 1

In [2]:
:extension OverloadedStrings
:extension ScopedTypeVariables
import qualified Data.Text as T
import Prelude as P

In [2]:
-- 00
src ="stressed" :: T.Text
putStrLn $ T.unpack $ T.reverse src 

desserts

In [3]:
-- 01
src = "パタトクカシーー" :: T.Text

f :: T.Text -> Maybe T.Text
f s | T.length s < 7 =  Nothing
    | otherwise = Just $ T.pack [T.head s, T.index s 2, T.index s 4, T.index s 6]

case f src of 
    Just s -> putStrLn $ T.unpack s
    Nothing -> return ()

パトカー

In [4]:
-- 02
src1 = "パトカー"
src2 = "タクシー"

f :: T.Text -> T.Text -> T.Text
f z w = P.foldr (\s t -> T.pack [fst s, snd s] <> t)  "" (T.zip z w) 

putStrLn $ T.unpack $ f src1 src2

パタトクカシーー

In [5]:
-- 03
src = "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."

getCountList :: T.Text -> [Int]
getCountList s = 
    let 
        ws' = P.map (T.filter (\c -> c /= ',' && c /= '.')) (T.words s)
    in 
        builder ws' []
    where 
        builder :: [T.Text] -> [Int] -> [Int]
        builder ws counts | P.null ws = counts
                          | otherwise = builder (P.tail ws) (counts <> [T.length (P.head ws)])

getCountList src

[3,1,4,1,5,9,2,6,5,3,5,8,9,7,9]

In [6]:
-- 04
import qualified Data.Map.Strict as HM
import Data.Function
src = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
onlyOne = [0, 4, 5, 6, 7, 8, 14, 15, 18]

f :: T.Text -> T.Text
f s =
    let
        ws = T.words s
        wns = P.zip [0..(P.length ws -1)] ws
    in seq ws $
        if P.length ws < 19 then ""
        else seq wns $
            P.foldr (\s t -> let (ind, text) = s in if ind `elem` onlyOne then T.pack [T.head text] <> t else T.pack [T.head text, T.index text 1] <> t) "" wns

putStrLn $ T.unpack $ f src

pairingIndex :: T.Text -> HM.Map T.Text Int
pairingIndex sentences =
    let
        ws' = T.words src
        ws = P.filter ( /= ".") $! ws'
        wns = P.zip [0..(P.length ws -1)] ws
    in
        flip fix (HM.empty, wns) $ \loop (dict, rest) ->
            if P.null rest then dict
            else
                let (v, k) = P.head rest in
                loop (HM.insert k v dict, P.tail rest)


print $ pairingIndex src 


HHeLiBeBCNOFNeNaMiAlSiPSClArKCa

fromList [("Also",12),("Arthur",17),("Because",3),("Boron",4),("Can.",19),("Clause.",16),("Could",5),("Fluorine.",8),("He",1),("Hi",0),("King",18),("Lied",2),("Might",11),("Nations",10),("New",9),("Not",6),("Oxidize",7),("Peace",14),("Security",15),("Sign",13)]

In [7]:
--05
getNGram :: Int -> T.Text -> [T.Text] -> [T.Text]
getNGram n s nGrams | n <= 0 = nGrams
                    | T.length s < n = nGrams
                    | otherwise = getNGram n (T.drop 1 s) (nGrams <> [T.take n s])

getNWordsGram :: Int -> T.Text -> [T.Text]
getNWordsGram n s = builder n (T.words s) []
    where
        builder :: Int -> [T.Text] -> [T.Text] -> [T.Text]
        builder n ws nGrams | P.length ws < n = nGrams
                            | n <= 0 = nGrams
                            | otherwise = builder n (P.drop 1 ws) (nGrams <> [T.concat (P.map (<> " ") $ P.take n ws)])

print $ getNGram 2 "I am an NLPer" []
print $ getNWordsGram 2 "I am an NLPer"

["I "," a","am","m "," a","an","n "," N","NL","LP","Pe","er"]

["I am ","am an ","an NLPer "]

In [8]:
-- 06
import qualified Data.HashSet as HS
setX = HS.fromList $ getNGram 2 "paraparaparadise" []
setY = HS.fromList $ getNGram 2 "paragraph" []

print $ HS.unions [setX, setY]
print $ HS.intersection setX setY
print $ HS.difference setX setY

fromList ["ad","gr","ar","pa","di","ag","se","is","ph","ra","ap"]

fromList ["ar","pa","ra","ap"]

fromList ["ad","di","se","is"]

In [9]:
-- 07
{-# Language ScopedTypeVariables #-}
{-# Language TypeApplications #-}
{-# Language ConstraintKinds #-}
{-# Language GADTs #-}

import qualified Type.Reflection as TRF

type Generable a = (Eq a, TRF.Typeable a, Show a)

packToText :: forall a. (Eq a, TRF.Typeable a, Show a) => a -> T.Text
packToText x =  
    case TRF.eqTypeRep (TRF.typeRep @a) (TRF.typeRep @String) of
        -- コンパイラは a と String が等しいことがわかる
        Just TRF.HRefl -> T.pack x
        Nothing -> T.pack $ show x

f :: (Generable a, Generable b, Generable c) => a -> b -> c -> T.Text
f x y z = packToText x <> "時の" <> packToText y <> "は" <> packToText z

putStrLn $ T.unpack $ f 12 "気温" 22.4


12時の気温は22.4

In [10]:
-- 08
import qualified Data.ByteString as BS
import qualified Data.ByteString.UTF8 as BSU
import Data.Char

encryptChar :: Char -> Char
encryptChar c | isLower c = 
    let 
        cb = head $ BS.unpack $ BSU.fromChar c
    in
        head $ BSU.toString $ BS.pack [219 - cb]
              | otherwise = c

decryptChar :: Char -> Char
decryptChar c =
    if check c then
        head $ BSU.toString $ BS.pack [219 - head (BS.unpack $ BSU.fromChar c)]
    else
        c
    where
        check c = 
            let
                target = head (BS.unpack $ BSU.fromChar c) 
                min = head $ BS.unpack $ BSU.fromChar 'a'
                max = head $ BS.unpack $ BSU.fromChar 'z'
            in 
                min <= target && target <= max


encrypt :: T.Text -> T.Text
encrypt = T.map encryptChar

decrypt :: T.Text -> T.Text
decrypt = T.map decryptChar

message = "こんにちは this is encrypted message. THIS IS NOT ENCRYPTED"
putStr $ T.unpack $ encrypt message
putStr $ T.unpack $ decrypt (encrypt message) 


こんにちは gsrh rh vmxibkgvw nvhhztv. THIS IS NOT ENCRYPTED

こんにちは this is encrypted message. THIS IS NOT ENCRYPTED

In [11]:
-- 09
import qualified System.Random as RD
import Debug.Trace

factorial :: Int -> Int
factorial 0 = 1
factorial n | n > 0 = let n' = factorial (n -1) in seq n' $ n * n' 

interleave :: a -> [a] -> [[a]]
interleave x [] = [[x]]
interleave x (y:ys) = (x:y:ys) : map (y:) (interleave x ys)

perms :: [a] -> [[a]]
perms = foldr (concatMap . interleave) [[]]

getTypoglycemia :: RD.RandomGen g => g -> String -> String
getTypoglycemia g t | length t > 4 = 
    let
        seed = perms (tail (init t))
        n = fst $ RD.randomR (0, factorial (length t - 2) -1) g
    in
        [head t] ++ (seed !! n) ++ [last t]
                    | otherwise = t

arg = "I couldn’t believe that I could actually understand what I was reading : the phenomenal power of the human mind ."

do 
    g <- RD.newStdGen
    putStrLn $ unwords $map (getTypoglycemia g) (words arg)


I c’dlonut beevile that I cluod alauclty utadnenrsd what I was reindag : the penehnaoml pewor of the hamun mind .

### chapter 2

In [13]:
getPopularNames :: IO T.Text = T.pack <$> readFile ".data/popular-names.txt"

In [4]:
-- 10

do 
    content <- readFile ".data/popular-names.txt"
    print $ foldr (\c n -> if c == '\n' then 1 + n else n) 1 content 

2781

-- bash  

    $  wc ./data/popular-names.txt

In [14]:
-- 11
do 
    content <- readFile ".data/popular-names.txt"
    putStrLn "----- input"
    putStrLn $ unlines $ take 10 $ lines content
    putStrLn "----- output"
    putStrLn $ unlines $ take 10 $ lines $ map (\c -> if c == '\t' then ' ' else c) content

----- input
Mary	F	7065	1880
Anna	F	2604	1880
Emma	F	2003	1880
Elizabeth	F	1939	1880
Minnie	F	1746	1880
Margaret	F	1578	1880
Ida	F	1472	1880
Alice	F	1414	1880
Bertha	F	1320	1880
Sarah	F	1288	1880

----- output
Mary F 7065 1880
Anna F 2604 1880
Emma F 2003 1880
Elizabeth F 1939 1880
Minnie F 1746 1880
Margaret F 1578 1880
Ida F 1472 1880
Alice F 1414 1880
Bertha F 1320 1880
Sarah F 1288 1880

-- bash  

    $ cat ./.data/popular-names.txt | tr '\t' ' '

In [5]:
-- 12
import System.IO
col1 :: String  = "./.data/out/col1.txt"
col2 :: String = "./.data/out/col2.txt"
do
    content <- T.map (\c -> if c == '\t' then ' ' else c) <$> getPopularNames
    writeFile col1 $ T.unpack $ T.unlines $ map (head . T.words) $ T.lines content
    writeFile col2 $ T.unpack $ T.unlines $ map (\line -> T.words line !! 1) $ T.lines content 

-- bash  

    $ cut --fields=1 ./.data/popular-names.txt > ./.data/out_bash/col1.txt  
    $ cut --fields=2 ./.data/popular-names.txt > ./.data/out_bash/col2.txt


In [16]:
-- 13
getCol1 = T.pack <$> readFile "./.data/out/col1.txt"
getCol2 = T.pack <$> readFile "./.data/out/col2.txt"

mergeCols :: T.Text -> T.Text -> T.Text
mergeCols col1 col2 = T.unlines (merge (T.lines col1) (T.lines col2))
    where
        merge = zipWith (\x y -> x <> "\t" <> y) 
do 
    col1 <- getCol1
    col2 <- getCol2
    putStrLn $ T.unpack $ mergeCols col1 col2 

Mary	F
Anna	F
Emma	F
Elizabeth	F
Minnie	F
Margaret	F
Ida	F
Alice	F
Bertha	F
Sarah	F
John	M
William	M
James	M
Charles	M
George	M
Frank	M
Joseph	M
Thomas	M
Henry	M
Robert	M
Mary	F
Anna	F
Emma	F
Elizabeth	F
Margaret	F
Minnie	F
Ida	F
Annie	F
Bertha	F
Alice	F
John	M
William	M
James	M
George	M
Charles	M
Frank	M
Joseph	M
Henry	M
Thomas	M
Edward	M
Mary	F
Anna	F
Emma	F
Elizabeth	F
Minnie	F
Margaret	F
Ida	F
Alice	F
Bertha	F
Annie	F
John	M
William	M
James	M
George	M
Charles	M
Frank	M
Joseph	M
Thomas	M
Henry	M
Robert	M
Mary	F
Anna	F
Emma	F
Elizabeth	F
Minnie	F
Margaret	F
Bertha	F
Ida	F
Annie	F
Clara	F
John	M
William	M
James	M
Charles	M
George	M
Frank	M
Joseph	M
Henry	M
Robert	M
Thomas	M
Mary	F
Anna	F
Emma	F
Elizabeth	F
Minnie	F
Margaret	F
Ida	F
Clara	F
Bertha	F
Annie	F
John	M
William	M
James	M
George	M
Charles	M
Frank	M
Joseph	M
Thomas	M
Henry	M
Robert	M
Mary	F
Anna	F
Emma	F
Elizabeth	F
Margaret	F
Minnie	F
Clara	F
Bertha	F
Ida	F
Annie	F
John	M
William	M
James	M
George	M
Charles	M
Frank	M
Joseph	M


-- bash  

    $  paste ./.data/out/col1.txt  ./.data/out/col2.txt 

In [4]:
-- 14
input = 10

do
    let n = input
    d <- getPopularNames
    putStrLn $ T.unpack $ T.unlines $ take n $ T.lines d

Mary	F	7065	1880
Anna	F	2604	1880
Emma	F	2003	1880
Elizabeth	F	1939	1880
Minnie	F	1746	1880
Margaret	F	1578	1880
Ida	F	1472	1880
Alice	F	1414	1880
Bertha	F	1320	1880
Sarah	F	1288	1880

-- bash  

    $ head -n ./.data/popular-names.txt

In [5]:
-- 15

input = 10

do
    let n = input
    d <- getPopularNames
    putStrLn $ T.unpack $ T.unlines $ reverse $ take n  $ reverse $ T.lines d 

Liam	M	19837	2018
Noah	M	18267	2018
William	M	14516	2018
James	M	13525	2018
Oliver	M	13389	2018
Benjamin	M	13381	2018
Elijah	M	12886	2018
Lucas	M	12585	2018
Mason	M	12435	2018
Logan	M	12352	2018

-- bash  

    $ tail -n ./.data/popular-names.txt

In [6]:
-- 16

input = 10

splitList :: Int ->  [a] -> [[a]]
splitList n _  | n <= 0 = []
splitList n xs | length xs < n = [xs]
               | otherwise = take n xs : splitList n (drop n xs)

splitLn :: Int -> T.Text -> [T.Text]
splitLn n t = map T.unlines $ filter (not . null) $splitList n (T.lines t)

toSuffix :: Int -> String -> String
toSuffix n s | n < length s = s
             | otherwise = toSuffix n ("0" <> s)

do 
    let n = input
    d <- getPopularNames
    let outs = splitLn n d
    mapM_ (\it -> writeFile ("out" <> toSuffix 3 (show (fst it))) (T.unpack $ snd it)) (zip [0..(length outs)] outs)



-- bash  

    $ split -l n -d -a 4 ./.data/popular-names.txt out

In [14]:
-- 17 
import qualified Data.HashSet as HS

input = 10

getOneColumn1 :: T.Text -> [T.Text]
getOneColumn1 t = map (head . T.words) $ T.lines $ T.map (\c -> if c == '\t' then ' ' else c) t

getCharSet :: T.Text -> HS.HashSet Char
getCharSet ts = consumer ts HS.empty
    where 
        consumer text hs | T.null text = hs
                         | otherwise = consumer (T.tail text) (HS.insert (T.head text) hs)


do 
    arg <- getPopularNames
    let column1 = T.concat $ getOneColumn1 arg
    print $ HS.size $ getCharSet column1

    

46

-- 17bash
