In [1]:
:! wget "https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt"

--2023-05-04 06:24:44--  https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.111.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1115394 (1.1M) [text/plain]
Saving to: âtiny-shakespeare.txtâ

     0K .......... .......... .......... .......... ..........  4% 2.50M 0s
    50K .......... .......... .......... .......... ..........  9% 2.71M 0s
   100K .......... .......... .......... .......... .......... 13% 11.0M 0s
   150K .......... .......... .......... .......... .......... 18% 15.0M 0s
   200K .......... .......... .......... .......... .......... 22% 4.07M 0s
   250K .......... .......... .......... .......... .......... 27% 13.1M 0s
   300K .......... .......... .......... .......... .......... 32% 14.6M 0s
   350K .

In [1]:
-- read and inspect it
text <- readFile "tiny-shakespeare.txt"

In [2]:
print $ length text

1115394

In [3]:
import Prelude
import qualified Prelude
-- let's look at the first 1000 characters
putStr $ Prelude.take 1000 text

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.

In [4]:
-- Here are all the unique characters that occur in this text
import Data.List (sort)
import qualified Data.Set as S
chars = (sort . S.toList . S.fromList) text 
vocabSize = length chars
print chars
print vocabSize

"\n !$&',-.3:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

65

In [5]:
-- create a mapping from characters to integers
import qualified Data.Map as M

stoi :: M.Map Char Int
stoi = M.fromList $ zip chars [0..]

itos :: M.Map Int Char
itos = M.fromList $ zip [0..] chars

-- encoder: take a string, output a list of integers
encode :: String -> [Int]
encode = map (stoi M.!)

-- decoder: take a list of integers, output a string
decode :: [Int] -> String
decode = map (itos M.!)

print $ encode "hii there"
print $ (decode . encode) "hii there"

[46,47,47,1,58,46,43,56,43]

"hii there"

In [6]:
{-# LANGUAGE OverloadedStrings #-}
import Data.Text (Text)
import qualified Data.Text as T
import Data.Vector (Vector)
import qualified Data.Vector as V
import qualified Data.Map as M

-- create a mapping from characters to integers

stoi :: M.Map Char Int
stoi = M.fromList $ zip chars [0..]

itos :: M.Map Int Char
itos = M.fromList $ zip [0..] chars

encode :: Text -> [Int]
encode = map (stoi M.!) . T.unpack

decode :: [Int] -> Text
decode = T.pack . map (itos M.!)

In [7]:
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
:ext QuasiQuotes
:ext TemplateHaskell

import Torch
import Torch.Device

import qualified Torch.Functional.Internal as TFI
import GHC.Int (Int64)

device :: Device
device = Device CPU 0

-- First, we need to convert the encoded text into a Haskell list of Int64 values
encodedText :: [Int64]
encodedText = fromIntegral <$> encode (T.pack text)

-- Now, let's create the Hasktorch tensor from the encodedText list
dataTensor :: Tensor
dataTensor = asTensor encodedText


putStrLn $ "Shape: " ++ show (shape dataTensor)
putStrLn "DType: Int64" -- In Hasktorch, dtype is fixed when using 'asTensor'
putStrLn "First 1000 characters:"
print $ TFI.slice dataTensor 0 0 1000 1
-- print $ dataTensor ! [slice|0::1000|]

Shape: [1115394]

DType: Int64

First 1000 characters:

Tensor Int64 [1000] [ 18,  47,  56,  57,  58,  1,  15,  47,  58,  47,  64,  43,  52,  10,  0,  14,  43,  44,  53,  56,  43,  1,  61,  43,  1,  54,  56,  53,  41,  43,  43,  42,  1,  39,  52,  63,  1,  44,  59,  56,  58,  46,  43,  56,  6,  1,  46,  43,  39,  56,  1,  51,  43,  1,  57,  54,  43,  39,  49,  8,  0,  0,  13,  50,  50,  10,  0,  31,  54,  43,  39,  49,  6,  1,  57,  54,  43,  39,  49,  8,  0,  0,  18,  47,  56,  57,  58,  1,  15,  47,  58,  47,  64,  43,  52,  10,  0,  37,  53,  59,  1,  39,  56,  43,  1,  39,  50,  50,  1,  56,  43,  57,  53,  50,  60,  43,  42,  1,  56,  39,  58,  46,  43,  56,  1,  58,  53,  1,  42,  47,  43,  1,  58,  46,  39,  52,  1,  58,  53,  1,  44,  39,  51,  47,  57,  46,  12,  0,  0,  13,  50,  50,  10,  0,  30,  43,  57,  53,  50,  60,  43,  42,  8,  1,  56,  43,  57,  53,  50,  60,  43,  42,  8,  0,  0,  18,  47,  56,  57,  58,  1,  15,  47,  58,  47,  64,  43,  52,  10,  0,  18,  47,  56,  57,  58,  6,  1,  63,  53,  59,  1,  49,  52,  53,  6

In [8]:
import GHC.Float (float2Double)

-- Calculate the length of the data tensor
dataLength :: Int
dataLength = length encodedText

-- Calculate the split index (90% for training, 10% for validation)
splitIndex :: Int
splitIndex = round $ float2Double (0.9 * fromIntegral dataLength)

-- Split the data into train and validation sets
trainValDataPair :: ([Int64], [Int64])
trainValDataPair = splitAt splitIndex encodedText

trainData = fst trainValDataPair

valData = snd trainValDataPair 

-- Create train and validation tensors
trainDataTensor :: Tensor
trainDataTensor = asTensor trainData

valDataTensor :: Tensor
valDataTensor = asTensor valData

-- Print the train and validation tensors
--putStrLn $ "Train Data Tensor: " ++ show trainDataTensor
--putStrLn $ "Validation Data Tensor: " ++ show valDataTensor

In [9]:
blockSize :: Int
blockSize = 8

firstNItems :: Tensor
firstNItems = TFI.slice trainDataTensor 0 0 (blockSize + 1) 1


trainDataList :: [Int64]
trainDataList = [asValue $ select 0 i trainDataTensor :: Int64 | i <- [0 .. (length trainData - 1)]]

contextList :: [[Int64]]
contextList = [Prelude.take (t + 1) trainDataList | t <- [0 .. (blockSize - 1)]]

targetList :: [Int64]
targetList = drop 1 trainDataList

printPair :: forall {a1} {a2}. (Show a1, Show a2) => (a1, a2) -> IO ()
printPair (context, target) = putStrLn $ "when input is " ++ show context ++ " the target: " ++ show target

mapM_ printPair $ zip contextList targetList

when input is [18] the target: 47
when input is [18,47] the target: 56
when input is [18,47,56] the target: 57
when input is [18,47,56,57] the target: 58
when input is [18,47,56,57,58] the target: 1
when input is [18,47,56,57,58,1] the target: 15
when input is [18,47,56,57,58,1,15] the target: 47
when input is [18,47,56,57,58,1,15,47] the target: 58

In [10]:
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GADTs #-}

import Torch.Tensor 

--import Torch.Typed.Functional

import Torch.Functional


device :: Device
device = Device CPU 0

torchManualSeed :: forall {a}. Num a => a
torchManualSeed = 1337

batchSize :: Int
batchSize = 4

blockSize :: Int
blockSize = 8

getBatch :: String -> IO (Tensor, Tensor)
getBatch split = do
    let data' = if split == "train" then trainDataTensor else valDataTensor
    let dataList'= if split == "train" then trainData else valData
    gen <- mkGenerator device torchManualSeed
    let ix = fst $ randint' 0 ((length dataList') - blockSize) [batchSize] gen
    print ix
    let indices = [asValue $ select 0 i (toType Int64 ix) :: Int | i <- [0 .. (batchSize - 1)]]
    let x = Torch.stack (Dim 0) [TFI.slice data' 0 i (i + blockSize) 1 | i <- indices]
    let y = Torch.stack (Dim 0) [TFI.slice data' 0 (i + 1) (i + blockSize + 1) 1 | i <- indices]
    return (x, y)



(xb, yb) <- getBatch "train"
putStrLn "inputs:"
putStrLn $ "Shape: " ++ show (shape xb)
print xb
putStrLn "targets:"
putStrLn $ "Shape: " ++ show (shape yb)
print yb

putStrLn "----"

batchDims = [0 .. (batchSize - 1)]
putStrLn "here-2"
timeDims = [0 .. (blockSize - 1)]
putStrLn "here -1"
btPairs = [(b, t) | b <- batchDims, t <- timeDims]
print btPairs


printContextAndTarget :: (Int, Int) -> IO ()
printContextAndTarget (b, t) = do
  let context = indexSelect' 0 [b] xb
  let contextSliced = TFI.slice context 1 0 (t + 1) 1
  let targetTensor = indexSelect' 1 [t] (indexSelect' 0 [b] yb)
  let target = asValue targetTensor :: Int64
  putStrLn $ "when input is " ++ show contextSliced ++ " the target: " ++ show target



mapM_ printContextAndTarget btPairs



Tensor Float [4] [ 74928.0000   ,  231851.0000   ,  934226.0000   ,  560077.0000   ]

inputs:

Shape: [4,8]

Tensor Int64 [4,8] [[ 56,  6,  0,  24,  43,  58,  1,  61],
                    [ 39,  47,  51,  1,  58,  46,  39,  58],
                    [ 52,  45,  1,  58,  53,  1,  57,  39],
                    [ 43,  47,  52,  45,  1,  46,  53,  50]]

targets:

Shape: [4,8]

Tensor Int64 [4,8] [[ 6,  0,  24,  43,  58,  1,  61,  46],
                    [ 47,  51,  1,  58,  46,  39,  58,  1],
                    [ 45,  1,  58,  53,  1,  57,  39,  63],
                    [ 47,  52,  45,  1,  46,  53,  50,  47]]

----

here-2

here -1

[(0,0),(0,1),(0,2),(0,3),(0,4),(0,5),(0,6),(0,7),(1,0),(1,1),(1,2),(1,3),(1,4),(1,5),(1,6),(1,7),(2,0),(2,1),(2,2),(2,3),(2,4),(2,5),(2,6),(2,7),(3,0),(3,1),(3,2),(3,3),(3,4),(3,5),(3,6),(3,7)]

when input is Tensor Int64 [1,1] [[ 56]] the target: 6
when input is Tensor Int64 [1,2] [[ 56,  6]] the target: 0
when input is Tensor Int64 [1,3] [[ 56,  6,  0]] the target: 24
when input is Tensor Int64 [1,4] [[ 56,  6,  0,  24]] the target: 43
when input is Tensor Int64 [1,5] [[ 56,  6,  0,  24,  43]] the target: 58
when input is Tensor Int64 [1,6] [[ 56,  6,  0,  24,  43,  58]] the target: 1
when input is Tensor Int64 [1,7] [[ 56,  6,  0,  24,  43,  58,  1]] the target: 61
when input is Tensor Int64 [1,8] [[ 56,  6,  0,  24,  43,  58,  1,  61]] the target: 46
when input is Tensor Int64 [1,1] [[ 39]] the target: 47
when input is Tensor Int64 [1,2] [[ 39,  47]] the target: 51
when input is Tensor Int64 [1,3] [[ 39,  47,  51]] the target: 1
when input is Tensor Int64 [1,4] [[ 39,  47,  51,  1]] the target: 58
when input is Tensor Int64 [1,5] [[ 39,  47,  51,  1,  58]] the target: 46
when input is Tensor Int64 [1,6] [[ 39,  47,  51,  1,  58,  46]] the target: 39
when input is Tensor In

In [None]:
import Torch
import Torch.NN
import Torch.Functional as F

-- data BigramLanguageModel {
  

In [None]:
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}


import GHC.Generics (Generic)
import Torch.Typed.Autograd 
import Torch.Typed.Tensor 
import Torch.Typed.NN
import Torch.Typed.Functional
import qualified Torch.Device as D
import qualified Torch.Typed as D

data BigramLanguageModelSpec = BigramLanguageModelSpec {
    vocabSize :: Int
    } deriving (Show, Eq)

data BigramLanguageModel = BigramLanguageModel {
    tokenEmbeddingTable :: Embedding 'Nothing 100 100 'Learned 'D.Float '(D.CPU, 0)  
    } deriving (Show, Generic)

instance D.Randomizable BigramLanguageModelSpec BigramLanguageModel where
    sample BigramLanguageModelSpec {..} = do
        tokenEmbeddingTable <- sample $ LearnedEmbeddingWithRandomInitSpec @'Nothing (SNat @100) (SNat @100) (SDataType @D.Float) (SDevice @(D.CPU, 0))
        return $ BigramLanguageModel tokenEmbeddingTable

forward :: BigramLanguageModel -> Tensor '(D.CPU, 0) 'D.Int64 '[8, 10] -> Tensor '(D.CPU, 0) 'D.Float '[8, 10, 100]
forward BigramLanguageModel {..} idx = embed @'Nothing tokenEmbeddingTable idx

main :: IO ()
main = do
    let vocabSize = 100
    model <- sample $ BigramLanguageModelSpec vocabSize

    let batchSize = 8
    let inputSeqLen = 10

    idx <- randInt @('D.Int64, '(D.CPU, 0)) (Proxy @'[8, 10]) (0, vocabSize)
    let logits = forward model idx

    putStrLn $ "Logits shape: " ++ show (shape logits)



: 