# DSL translation web service

Anton Antonov   
RakuForPrediction at WordPress   
RakuForPrediction-book at GitHub   
August 2024

------- 

## Setup (notebook)

In [41]:
use HTTP::Tiny;
use JSON::Fast;
use URI::Encode;

use Data::Importers;
use Data::Reshapers;
use Data::Generators;
use Data::ExampleDatasets;

In [42]:
my $port = 9191;
my $llm-model = 'gpt-4o';

gpt-4o

-------

## Start the service

Here is the help message of `dsl-web-translation-service`:

```
dsl-web-translation-service --help

Usage:
  dsl-web-translation-service [--host=<Str>] [--port=<Str>] [--llm=<Str>] [--llm-model=<Str>] [--llm-api-key=<Str>] -- Start a Cro service for translation of DSLs into executable code.
  
    --host=<Str>           Host name. [default: 'localhost']
    --port=<Str>           Port. [default: '10000']
    --llm=<Str>            LLM service to use. [default: 'ChatGPT']
    --llm-model=<Str>      LLM model to use. [default: 'gpt-3.5-turbo']
    --llm-api-key=<Str>    LLM API key; if an empty string the corresponding env variable is used. [default: '']
```

For example start the DSL web service with the command:

```
dsl-web-translation-service --port=9191 --llm-model=gpt-4o
```

----------

## LLM setup of the Web service

### OpenAI API key

Set OPENAI_API_KEY with the URL:

```
http://localhost:9191/setup?api_key=<YOUR_API_KEY>
```

Or the command:

```
http://localhost:9191/setup?llm=ChatGPT&api_key=<YOUR_API_KEY>
```

In [142]:
my $url = "http://localhost:$port/setup?llm=gemini&user_id={uri_encode(random-pet-name)}&model={uri_encode('gemini-1.5-flash')}";
my $resp = HTTP::Tiny.get($url);
$resp<content>.decode

Could not connect to socket: Connection refused

In [None]:
my $url = "http://localhost:$port/show_setup";
my $resp = HTTP::Tiny.get($url);
$resp<content>.decode


### PaLM API key

Set PALM_API_KEY with the URL:

```
http://localhost:9191/setup?llm=PaLM&api_key=<YOUR_API_KEY>
```


-----

## DSL translation

In [124]:
my $cmd = "DSL TARGET Raku::Reshapers;
use data dfMeals;
inner join with dfFinelyFoodName over FOODID;
group by 'Cuisine';
find counts";

text-stats($cmd)

(chars => 123 words => 17 lines => 5)

In [125]:
my $url = "http://localhost:$port/translate?command={uri_encode($cmd)}&lang=R";
my $resp = HTTP::Tiny.get($url);

{content => Buf[uint8]:0x<7B 0A 20 20 22 55 53 45 52 49 44 22 3A 20 22 22 2C 0A 20 20 22 43 4F 44 45 22 3A 20 22 24 6F 62 6A 20 3D 20 64 66 4D 65 61 6C 73 20 3B 5C 6E 24 6F 62 6A 20 3D 20 6A 6F 69 6E 2D 61 63 72 6F 73 73 28 20 24 6F 62 6A 2C 20 64 66 46 69 6E 65 6C 79 46 6F 6F 64 4E 61 6D 65 2C 20 28 5C 22 46 4F 4F 44 49 44 ...>, headers => {content-length => 516, content-type => text/html; charset=utf-8}, protocol => HTTP/1.1, reason => OK, status => 200, success => True, url => http://localhost:9191/translate?command=DSL%20TARGET%20Raku::Reshapers;%0Ause%20data%20dfMeals;%0Ainner%20join%20with%20dfFinelyFoodName%20over%20FOODID;%0Agroup%20by%20'Cuisine';%0Afind%20counts&lang=R}

In [126]:
my $content = $resp<content>.decode

{
  "USERID": "",
  "CODE": "$obj = dfMeals ;\n$obj = join-across( $obj, dfFinelyFoodName, (\"FOODID\"), join-spec=>\"Inner\") ;\n$obj = group-by($obj, \"Cuisine\") ;\nsay \"counts: \", $obj>>.elems",
  "DSLTARGET": "Raku::Reshapers",
  "DSLFUNCTION": "proto sub ToDataQueryWorkflowCode (Str $command, |) {*}",
  "COMMAND": "DSL TARGET Raku::Reshapers;\nuse data dfMeals;\ninner join with dfFinelyFoodName over FOODID;\ngroup by 'Cuisine';\nfind counts",
  "DSL": "DSL::English::DataQueryWorkflows",
  "STDERR": ""
}

In [127]:
#%html
my %dsl-result = from-json($content);

#my @field-names  = <COMMAND CODE DSL DSLTARGET DSLFUNCTION USERID STDERR>;
my @field-names  = <COMMAND CODE DSLTARGET>;
[%dsl-result,] 
==> to-html(:@field-names, align => 'left') 
==> { $_.subst('<td ', '<td style="white-space:pre-wrap; word-wrap:break-word" '):g }()

COMMAND,CODE,DSLTARGET
DSL TARGET Raku::Reshapers; use data dfMeals; inner join with dfFinelyFoodName over FOODID; group by 'Cuisine'; find counts,"$obj = dfMeals ; $obj = join-across( $obj, dfFinelyFoodName, (""FOODID""), join-spec=>""Inner"") ; $obj = group-by($obj, ""Cuisine"") ; say ""counts: "", $obj>>.elems",Raku::Reshapers


-------

## Using the NLP Template Engine

In [128]:
my $cmd = q:to/END/;
create from aDocs;
create document term matrix with stemming;
show document term matrix statistics;
apply the term weight functions IDF, None, Cosine;
extract 60 topics with the method NNMF;
echo topics table;
show statistical thesaurus for interested, likely, want
END

text-stats($cmd)

(chars => 266 words => 39 lines => 7)

In [136]:
my $url = "http://localhost:$port/translate/qas?command={uri_encode($cmd)}&lang=WL&template=LatentSemanticAnalysis";
my $resp = HTTP::Tiny.get($url);

{content => Buf[uint8]:0x<7B 0A 20 20 22 44 53 4C 22 3A 20 22 22 2C 0A 20 20 22 44 53 4C 46 55 4E 43 54 49 4F 4E 22 3A 20 22 63 6F 6E 63 72 65 74 69 7A 65 22 2C 0A 20 20 22 55 53 45 52 49 44 22 3A 20 22 22 2C 0A 20 20 22 43 4F 4D 4D 41 4E 44 22 3A 20 22 63 72 65 61 74 65 20 66 72 6F 6D 20 61 44 6F 63 73 3B 5C 6E 63 72 65 61 ...>, headers => {content-length => 1001, content-type => text/html; charset=utf-8}, protocol => HTTP/1.1, reason => OK, status => 200, success => True, url => http://localhost:9191/translate/qas?command=create%20from%20aDocs;%0Acreate%20document%20term%20matrix%20with%20stemming;%0Ashow%20document%20term%20matrix%20statistics;%0Aapply%20the%20term%20weight%20functions%20IDF,%20None,%20Cosine;%0Aextract%2060%20topics%20with%20the%20method%20NNMF;%0Aecho%20topics%20table;%0Ashow%20statistical%20thesaurus%20for%20interested,%20likely,%20want%0A&lang=WL&template=LatentSemanticAnalysis}

In [137]:
my $content = $resp<content>.decode

{
  "DSL": "",
  "DSLFUNCTION": "concretize",
  "USERID": "",
  "COMMAND": "create from aDocs;\ncreate document term matrix with stemming;\nshow document term matrix statistics;\napply the term weight functions IDF, None, Cosine;\nextract 60 topics with the method NNMF;\necho topics table;\nshow statistical thesaurus for interested, likely, want\n",
  "DSLTARGET": "",
  "STDERR": "",
  "CODE": "lsaObj=\nLSAMonUnit[aDocs] ⟹\nLSAMonMakeDocumentTermMatrix[ \"StemmingRules\" -> True, \"StopWords\" -> Automatic] ⟹\nLSAMonEchoDocumentTermMatrixStatistics[\"LogBase\" -> 10] ⟹\nLSAMonApplyTermWeightFunctions[\"GlobalWeightFunction\" -> \"IDF\", \"LocalWeightFunction\" -> \"None\", \"NormalizerFunction\" -> \"Cosine\"] ⟹\nLSAMonExtractTopics[\"NumberOfTopics\" -> 60, Method -> \"NNMF\", \"MaxSteps\" -> 16, \"MinNumberOfDocumentsPerTerm\" -> 20] ⟹\nLSAMonEchoTopicsTable[\"NumberOfTerms\" -> 10] ⟹\nLSAMonEchoStatisticalThesaurus[ \"Words\" -> {\"interested\", \"likely\", \"want\"}];"
}

In [138]:
#%html
my %dsl-result = from-json($content);

#my @field-names  = <COMMAND CODE DSL DSLTARGET DSLFUNCTION USERID STDERR>;
my @field-names  = <COMMAND CODE DSLTARGET>;
[%dsl-result,] 
==> to-html(:@field-names, align => 'left') 
==> { $_.subst('<td ', '<td style="white-space:pre-wrap; word-wrap:break-word" '):g }()

COMMAND,CODE,DSLTARGET
"create from aDocs; create document term matrix with stemming; show document term matrix statistics; apply the term weight functions IDF, None, Cosine; extract 60 topics with the method NNMF; echo topics table; show statistical thesaurus for interested, likely, want","lsaObj= LSAMonUnit[aDocs] ⟹ LSAMonMakeDocumentTermMatrix[ ""StemmingRules"" -> True, ""StopWords"" -> Automatic] ⟹ LSAMonEchoDocumentTermMatrixStatistics[""LogBase"" -> 10] ⟹ LSAMonApplyTermWeightFunctions[""GlobalWeightFunction"" -> ""IDF"", ""LocalWeightFunction"" -> ""None"", ""NormalizerFunction"" -> ""Cosine""] ⟹ LSAMonExtractTopics[""NumberOfTopics"" -> 60, Method -> ""NNMF"", ""MaxSteps"" -> 16, ""MinNumberOfDocumentsPerTerm"" -> 20] ⟹ LSAMonEchoTopicsTable[""NumberOfTerms"" -> 10] ⟹ LSAMonEchoStatisticalThesaurus[ ""Words"" -> {""interested"", ""likely"", ""want""}];",


-------

## Questions answering

In [139]:
my $text = q:to/END/;
Today is Wednesday and it is 35C hot!
END

my @questions = [
    'What day?',
    'How hot?',
    'What temperature scale'
];

[What day? How hot? What temperature scale]

By default the simple string answer is obtained (because the argument "pairs" is "false"):

In [140]:
my $url = "http://localhost:$port/find-textual-answer?text={uri_encode($text)}&questions={uri_encode(@questions.head(2).join)}";
my $resp = HTTP::Tiny.get($url);
my $content = $resp<content>.decode;

Wednesday
35C

If it is desired to obtain a hash with the question-answer pairs the parameter "pairs" has to be set to "true":

In [141]:
my $url = "http://localhost:$port/find-textual-answer?text={uri_encode($text)}&questions={uri_encode(@questions.join("\n"))}&pairs=true";
my $resp = HTTP::Tiny.get($url);
my $content = $resp<content>.decode;

{
  "What temperature scale?": "Celsius",
  "What day?": "Wednesday",
  "How hot?": "35C"
}

-----

## Number translation

### From numeric word forms to numbers

In [53]:
my $url = "http://localhost:$port/translate/numeric?command={uri_encode('двеста деветдесет и осем; one million fifty thousand and twenty seven')}";
my $resp = HTTP::Tiny.get($url);

{content => Buf[uint8]:0x<7B 0A 20 20 22 43 4F 4D 4D 41 4E 44 22 3A 20 22 D0 B4 D0 B2 D0 B5 D1 81 D1 82 D0 B0 20 D0 B4 D0 B5 D0 B2 D0 B5 D1 82 D0 B4 D0 B5 D1 81 D0 B5 D1 82 20 D0 B8 20 D0 BE D1 81 D0 B5 D0 BC 3B 20 6F 6E 65 20 6D 69 6C 6C 69 6F 6E 20 66 69 66 74 79 20 74 68 6F 75 73 61 6E 64 20 61 6E 64 20 74 77 65 6E 74 79 ...>, headers => {content-length => 338, content-type => text/html; charset=utf-8}, protocol => HTTP/1.1, reason => OK, status => 200, success => True, url => http://localhost:9191/translate/numeric?command=%D0%B4%D0%B2%D0%B5%D1%81%D1%82%D0%B0%20%D0%B4%D0%B5%D0%B2%D0%B5%D1%82%D0%B4%D0%B5%D1%81%D0%B5%D1%82%20%D0%B8%20%D0%BE%D1%81%D0%B5%D0%BC;%20one%20million%20fifty%20thousand%20and%20twenty%20seven}

In [54]:
$resp<content>.decode

{
  "COMMAND": "двеста деветдесет и осем; one million fifty thousand and twenty seven",
  "DSL": "Lingua::NumericWordForms",
  "USERID": "",
  "DSLTARGET": "Lingua::NumericWordForms",
  "CODE": [
    {
      "bulgarian": 298
    },
    {
      "english": 1050027
    }
  ],
  "DSLFUNCTION": "from-numeric-word-form"
}

### From numbers to numeric word forms

In [55]:
my $url = "http://localhost:$port/translate/numeric?command={uri_encode('32329;2323')}&lang=Bulgarian";
my $resp = HTTP::Tiny.get($url);

{content => Buf[uint8]:0x<7B 0A 20 20 22 43 4F 44 45 22 3A 20 5B 0A 20 20 20 20 22 D1 82 D1 80 D0 B8 D0 B4 D0 B5 D1 81 D0 B5 D1 82 20 D0 B8 20 D0 B4 D0 B2 D0 B5 20 D1 85 D0 B8 D0 BB D1 8F D0 B4 D0 B8 20 D1 82 D1 80 D0 B8 D1 81 D1 82 D0 B0 20 D0 B4 D0 B2 D0 B0 D0 B4 D0 B5 D1 81 D0 B5 D1 82 20 D0 B8 20 D0 B4 D0 B5 D0 B2 D0 B5 ...>, headers => {content-length => 340, content-type => text/html; charset=utf-8}, protocol => HTTP/1.1, reason => OK, status => 200, success => True, url => http://localhost:9191/translate/numeric?command=32329;2323&lang=Bulgarian}

In [56]:
$resp<content>.decode

{
  "CODE": [
    "тридесет и две хиляди триста двадесет и девет",
    "две хиляди триста двадесет и три"
  ],
  "DSLTARGET": "Lingua::NumericWordForms",
  "COMMAND": "32329;2323",
  "USERID": "",
  "DSLFUNCTION": "to-numeric-word-form",
  "DSL": "Lingua::NumericWordForms"
}