In [None]:
import os
import urllib.request

In [None]:
if (not os.path.exists('tinystories-vocab-3k-cai.csv')):
  !git clone https://huggingface.co/datasets/schuler/TinyStories4Pascal-Tokenized-v2
  !unzip TinyStories4Pascal-Tokenized-v2/tinystories-2.1M-tokenized3k.csv.zip
  !unzip TinyStories4Pascal-Tokenized-v2/tinystories-vocab-3k-cai.csv.zip

In [None]:
!apt-get update && apt-get -y install fpc fpc-source lazarus git subversion

0% [Working]            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
0% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com] [Connected to r2u.stat.ill                                                                                                    Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
0% [Connecting to archive.ubuntu.com (185.125.190.81)] [Connecting to security.ubuntu.com (185.125.10% [Connecting to archive.ubuntu.com (185.125.190.81)] [Connecting to security.ubuntu.com (185.125.1                                                                                                    Ign:3 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:4 https://r2u.stat.illinois.edu/ubuntu jammy Release
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:8 http://archive.ubuntu.com/ubuntu jammy-updates InRelease


In [None]:
!svn checkout https://svn.code.sf.net/p/cai/svncode/trunk/lazarus neural-api

Checked out revision 2093.


In [None]:
code = """
program smallTransformer;
(*
Copyright (C) 2024 Joao Paulo Schwarz Schuler

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*)

{$mode objfpc}{$H+}

uses {$IFDEF UNIX} {$IFDEF UseCThreads}
  cthreads, {$ENDIF} {$ENDIF}
  Classes,
  neuralnetwork,
  neuralvolume,
  neuralfit,
  neuralthread,
  neuraldatasets,
  neuralab,
  neuraltokenizer,
  CustApp,
  Math,
  sysutils;

const
  csContextLen = 80;
  csTrainingFileName = 'tinystories-2.1M-tokenized3k.csv';
  csVocabFileName = 'tinystories-vocab-3k-cai.csv';
  csMaxTrainingRows = 300000;
  csMinSampleSize = 3; // Minimum of 1 token.
  csEmbedDim = 768;
  csModelVocabSize = 3000;


type
  TTestFitLoading = class(TCustomApplication)
  protected
    FDataset: array of array of integer;
    FDictionary: TNeuralTokenizer;
    FDatasetSize: integer;
    FNN: THistoricalNets;
    NFit: TNeuralDataLoadingFit;
    FSampler: TNNetSamplerBase;
    FMaxPredictCharPos: integer;
    FVocabSize: integer; // Character based vocabulary/dictionary.
    procedure LoadDataset;
    procedure DoRun; override;
  public
    procedure OnAfterEpoch(Sender: TObject);
    procedure OnAfterStep(Sender: TObject);
    procedure GetTrainingPair(Idx: integer; ThreadId: integer; pInput, pOutput: TNNetVolume);
    procedure GetValidationPair(Idx: integer; ThreadId: integer; pInput, pOutput: TNNetVolume);
    procedure GetTestPair(Idx: integer; ThreadId: integer; pInput, pOutput: TNNetVolume);
  end;

  procedure TTestFitLoading.LoadDataset;
  var
    Tokens: array of integer;
  begin
    WriteLn('Loading vocabulary: ', csVocabFileName);
    FDictionary.LoadVocabularyFromFile(csVocabFileName);
    FDictionary.Tokenize('one day a', Tokens);
    WriteLn('one day a: ',Tokens[0],' ',Tokens[1],' ',Tokens[2]);

    WriteLn('Dic : ', FDictionary.DeTokenize(Tokens[0]));
    WriteLn('Dic : ', FDictionary.DeTokenize(Tokens[1]));
    WriteLn('Dic : ', FDictionary.DeTokenize(Tokens[2]));

    WriteLn('Loading dataset: ', csTrainingFileName);
    LoadIntegersInCSV(csTrainingFileName, FDataset, csMaxTrainingRows);

    FVocabSize := FDictionary.Count;
    FDatasetSize := Length(FDataSet);

    WriteLn('Loaded dataset with ', FDatasetSize, ' rows');
  end;

  function CreateCaiTransformerDecoder(pVocabSize, pEmbedDim, pLayers, pContextSize: integer): THistoricalNets;
  var
    CntLayer: integer;
  begin
    Result := THistoricalNets.Create();
    Result.AddLayer([
      TNNetInput.Create(pContextSize, 1, 1),
      TNNetTokenAndPositionalEmbedding.Create(pVocabSize, pEmbedDim)
    ]);
    for CntLayer := 1 to pLayers do
    begin
      Result.AddTransformerBlockCAI( 12, 4*768, true, true, false);
    end;
    Result.AddLayer([
      TNNetPointwiseConvLinear.Create(pVocabSize, 1),
      TNNetPointwiseSoftMax.Create(1)
    ]);
  end;

  procedure TTestFitLoading.DoRun;
  var
    W: TNNetLayer;
    I: integer;
    Opt: TNeuralOptimizerAdam;
  begin
    FDictionary := TNeuralTokenizer.Create();
    LoadDataset();
    Opt := TNeuralOptimizerAdam.Create(0.9, 0.98);
    NFit := TNeuralDataLoadingFit.Create();
    FMaxPredictCharPos := csContextLen;
    FSampler := TNNetSamplerTopP.Create(0.4);
    FNN := CreateCaiTransformerDecoder(csModelVocabSize, csEmbedDim, 12, csContextLen);

    DebugThreadCount();
    FNN.DebugStructure;
    FNN.DebugWeights();

    WriteLn('Computing...');
    NFit.LogEveryBatches := 100;
    NFit.InitialLearningRate := 0.0001;
    NFit.Optimizer := Opt;
    NFit.LearningRateDecay := 0.00;
    NFit.StaircaseEpochs := 1;
    NFit.L2Decay := 0.1;
    NFit.EnableMultiClassLoss();
    NFit.EnableClassComparisonInLastPixel();
    NFit.AvgWeightEpochCount := 1;
    NFit.OnAfterEpoch := @OnAfterEpoch;
    NFit.OnAfterStep := @OnAfterStep;
    NFit.FitLoading(
      FNN,
      {TrainingVolumesCount=}48000*1,
      {ValidationVolumesCount=}48000*1 div 20,
      {TestVolumesCount=}48000*1 div 20,
      {batchsize=}NeuralDefaultThreadCount(),
      {epochs=}500,
      @GetTrainingPair, @GetValidationPair, @GetTestPair
    );
    FNN.DebugWeights();
    OnAfterEpoch(Self);
    FSampler.Free;
    Opt.Free;
    NFit.Free;
    FNN.Free;
    FDictionary.Free;
    Terminate;
  end;

  procedure TTestFitLoading.OnAfterEpoch(Sender: TObject);
  begin
    WriteLn(GenerateStringFromCasualNN(NFit.NN, FDictionary, 'one day', nil, csNeuralEncodingMethodIntChar),'.');
    WriteLn(GenerateStringFromCasualNN(NFit.NN, FDictionary, 'once upon a', nil, csNeuralEncodingMethodIntChar),'.');
    WriteLn(GenerateStringFromCasualNN(NFit.NN, FDictionary, 'once upon a time', nil, csNeuralEncodingMethodIntChar),'.');
    WriteLn(GenerateStringFromCasualNN(NFit.NN, FDictionary, 'once upon', nil, csNeuralEncodingMethodIntChar),'.');
    WriteLn(GenerateStringFromCasualNN(NFit.NN, FDictionary, 'billy', FSampler, csNeuralEncodingMethodIntChar),'.');
  end;

  procedure TTestFitLoading.OnAfterStep(Sender: TObject);
  begin
    //if Random(100)=0 then OnAfterEpoch(Sender);
    //NFit.ThreadNN[0].DebugWeights();
  end;

  procedure TTestFitLoading.GetTrainingPair(Idx: integer; ThreadId: integer;
    pInput, pOutput: TNNetVolume);
  var
    SampleId: integer;
    SampleLen: integer;
    SampleCutPosition: integer;
    ExpectedTokenInt: integer;
    AIntegerArray: array of integer;
  begin
    // Make sure that expected input and output have the proper sizes.
    if FNN.GetFirstLayer().Output.Size <> pInput.Size then pInput.ReSize(FNN.GetFirstLayer().Output);
    if FNN.GetLastLayer().Output.Size <> pOutput.Size then pOutput.ReSize(FNN.GetLastLayer().Output);
    // Get the input sample
    SampleId := Random(FDatasetSize);//Random(Min(48000,FDatasetSize));
    SampleLen := Min(Length(FDataset[SampleId]), pInput.SizeX);
    SampleLen := Min(FMaxPredictCharPos, SampleLen);
    SampleCutPosition := SampleLen;
    // The expected token is the next character in the string
    ExpectedTokenInt := FDataset[SampleId][SampleCutPosition-1];
    // Encode the input volume
    AIntegerArray := Copy(FDataset[SampleId], 0, SampleCutPosition);
    pInput.Fill(0);
    pInput.CopyNoChecksIntArr( AIntegerArray );
    // Encode the output volume (includes the predicted word)
    AIntegerArray := Copy(FDataset[SampleId], 1, SampleCutPosition);
    pOutput.OneHotEncoding(AIntegerArray) ;
    pOutput.Tag := ExpectedTokenInt;
  end;

  procedure TTestFitLoading.GetValidationPair(Idx: integer; ThreadId: integer;
    pInput, pOutput: TNNetVolume);
  var
    SampleId: integer;
    SampleLen: integer;
    SampleCutPosition: integer;
    ExpectedTokenChar: char;
    ExpectedTokenInt: integer;
    AIntegerArray: array of integer;
  begin
    // Make sure that expected input and output have the proper sizes.
    if FNN.GetFirstLayer().Output.Size <> pInput.Size then pInput.ReSize(FNN.GetFirstLayer().Output);
    if FNN.GetLastLayer().Output.Size <> pOutput.Size then pOutput.ReSize(FNN.GetLastLayer().Output);
    // Get the input sample
    SampleId := (Idx * 20) mod FDatasetSize; // Min(FDatasetSize,48000);
    SampleLen := Min(Length(FDataset[SampleId]), pInput.SizeX);
    SampleCutPosition := SampleLen;
    // The expected token is the next character in the string
    ExpectedTokenInt := FDataset[SampleId][SampleCutPosition-1];
    // Encode the input and output volumes
    AIntegerArray := Copy(FDataset[SampleId], 0, SampleCutPosition);
    pInput.Fill(0);
    pInput.CopyNoChecksIntArr( AIntegerArray );
    // Encode the output volume (includes the predicted word)
    AIntegerArray := Copy(FDataset[SampleId], 1, SampleCutPosition);
    pOutput.OneHotEncoding(AIntegerArray) ;
    pOutput.Tag := ExpectedTokenInt;
  end;

  procedure TTestFitLoading.GetTestPair(Idx: integer; ThreadId: integer;
    pInput, pOutput: TNNetVolume);
  begin
    GetValidationPair(Idx, ThreadId, pInput, pOutput);
  end;

var
  Application: TTestFitLoading;
begin
  Application := TTestFitLoading.Create(nil);
  Application.Title:='Nano Covolutional Based NLP Trained from File';
  Application.Run;
  Application.Free;
end.
"""
with open("neural-api/examples/CaiOptimizedDenseNet/CaiOptimizedDenseNet.lpr", "w") as text_file:
    text_file.write(code)
!lazbuild neural-api/examples/CaiOptimizedDenseNet/CaiOptimizedDenseNet.lpi
!ls -l neural-api/bin/x86_64-linux/bin/CaiOptimizedDenseNet

Hint: (lazarus) [RunTool] "/usr/bin/fpc" "-iWTOTP"
Hint: (lazarus) [RunTool] "/usr/bin/fpc" "-va" "compilertest.pas"
Hint: (lazarus) [RunTool] "/usr/bin/fpc" "-iWTOTP" "-Px86_64" "-Tlinux"
Hint: (lazarus) [RunTool] "/usr/bin/fpc" "-va" "compilertest.pas" "-Px86_64" "-Tlinux"
Hint: (11030) Start of reading config file /etc/fpc.cfg
Compiling Release Version
Hint: (11031) End of reading config file /etc/fpc.cfg
Free Pascal Compiler version 3.2.2+dfsg-9ubuntu1 [2022/04/11] for x86_64
Copyright (c) 1993-2021 by Florian Klaempfl and others
(1002) Target OS: Linux for x86-64
(3104) Compiling CaiOptimizedDenseNet.lpr
/content/neural-api/examples/CaiOptimizedDenseNet/CaiOptimizedDenseNet.lpr(74,45) Hint: (5091) Local variable "Tokens" of a managed type does not seem to be initialized
/content/neural-api/examples/CaiOptimizedDenseNet/CaiOptimizedDenseNet.lpr(111,5) Note: (5025) Local variable "W" not used
/content/neural-api/examples/CaiOptimizedDenseNet/CaiOptimizedDenseNet.lpr(112,5) Note: (50

In [None]:
!neural-api/bin/x86_64-linux/bin/CaiOptimizedDenseNet

Loading vocabulary: tinystories-vocab-3k-cai.csv
one day a: 1839 32 745
Dic : one
Dic :  
Dic : day
Loading dataset: tinystories-2.1M-tokenized3k.csv
Loaded dataset with 300000 rows
CPU threads reported by the operating system: 8.
 Layers: 1372
 Neurons:85945
 Weights:89542656 Weight Sum:-1081.309937 Bias sum:    0.000000 Inertia sum:    0.000000 Delta sum:    0.000000
Has AVX: TRUE Has AVX2: FALSE Has AVX512: FALSE
Layer  0 Neurons:   0 Weights:     0 TNNetInput(80,1,1,0,0) Output:80,1,1 Learning Rate:0.0100 Inertia:0.90 Weight Sum:  0.0000 Bias Sum:  0.0000 Branches:1
Layer  1 Neurons:   1 Weights:2304000 TNNetTokenAndPositionalEmbedding(3000,768,0,10000,0) Output:80,1,768 Learning Rate:0.0100 Inertia:0.90 Weight Sum:-31.4871 Bias Sum:  0.0000 Parent:0 Branches:4
Layer  2 Neurons: 768 Weights:589824 TNNetPointwiseConvLinear(768,1,0,1,1) Output:80,1,768 Learning Rate:0.0100 Inertia:0.90 Weight Sum:-41.2101 Bias Sum:  0.0000 Parent:1 Branches:1
Layer  3 Neurons: 768 Weights:589824 TNNe