# Graphdb + Dremio Playground


**NOTE**: this notebook is written in JavaScript, so you need Jupyter + a JavaScript interpreter to run it. You may use the Deno interpreter, which is powerful and comes with a prebuild support for notebooks
```bash
curl -fsSL https://deno.land/install.sh | sh  # Install Deno
"$HOME/.deno/bin/deno" jupyter --install      # Add Deno to Jupyter
```
now just restart VSCode/Jupyter and you should see Deno amongst the kernels

In [1]:
const hello = "hello world!"
console.log(hello)

hello world!


In [10]:
import { Client } from "https://deno.land/x/postgres/mod.ts";

const client = new Client({
  user: "user",
  database: "example",
  password: "pass",
  hostname: "localhost",
  port: 5432,
});
await client.connect();


## Setup Dremio

<https://docs.dremio.com/cloud/reference/api/>

In [1]:
const hostname = "http://localhost:9047";
const endpoint = `${hostname}/api/v3`;
const userName = 'dremioUser'
const password = 'dremioPass1' // password needs a number, otherwise you will get error 403

const post = async (url: string, data: Record<string, unknown>) => {
    const response = await fetch(url, {
        method: "POST",
        headers: {
        "Content-Type": "application/json",
        },
        body: JSON.stringify(data),
    });

    if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);  
    return response.json();
}


// create first user if none
const firstName = 'admin'
const lastName = ''
const email = 'luca.fabbian.1999@gmail.com'
const createdAt = Date.now()
const response = await fetch(`${hostname}/apiv2/bootstrap/firstuser`, {
    method: "PUT",
    headers: {
      "Authorization": '_dremionull',
      "Content-Type": "application/json",
    },
    body: JSON.stringify({userName, password, firstName, lastName, email, createdAt}),
});

if (!response.ok ){
  if(response.status !== 400) throw new Error(`Unexpected HTTP error! status: ${response.status}`);
  console.log("User alredy there, no need to create a new one")
}else{
  console.log("New user created", await response.json())
}


// get auth token
const {token, expires } = await post(`${hostname}/apiv2/login`, { userName, password })
console.log('Auth token:', token, '\nExpires at:', (new Date(expires)).toLocaleTimeString('en-GB'), '(30hours)')

// helper function to make dremio api calls
const dremio = async(method: 'GET'|'POST', url: string, data: Record<string, unknown>)  => {
    const response = await fetch(`${endpoint}${url}`, {
        method,
        headers: {
            'Authorization': `Bearer ${token}`,
            "Content-Type": "application/json",
        },
        body: JSON.stringify(data),
    });

    if (!response.ok) throw new Error(`HTTP error! status: ${response.status}`);  
    return response.json();
}

New user created {
  resourcePath: "/user/dremioUser",
  userName: "dremioUser",
  userConfig: {
    uid: { id: "c79a50fe-72d9-46cd-b8fc-f313cecc4db5" },
    userName: "dremioUser",
    firstName: "admin",
    lastName: "",
    email: "luca.fabbian.1999@gmail.com",
    createdAt: 1726054479889,
    modifiedAt: 1726054479888,
    version: "iNT7OexMCVY=",
    active: true
  },
  name: "dremioUser",
  id: "c79a50fe-72d9-46cd-b8fc-f313cecc4db5",
  links: { self: "/user/dremioUser" }
}
Auth token: 1jeef3og4ltsqegcuddp33fe61 
Expires at: 19:34:40 (30hours)


In [20]:
// inspect source(s)
await dremio('GET', '/catalog/1149e2d9-75be-49d3-80fc-04b3f56e828b')

{
  entityType: [32m"source"[39m,
  config: {
    path: [32m"/csv"[39m,
    defaultCtasFormat: [32m"ICEBERG"[39m,
    isPartitionInferenceEnabled: [33mfalse[39m
  },
  state: { status: [32m"good"[39m, suggestedUserAction: [32m""[39m, messages: [] },
  id: [32m"1149e2d9-75be-49d3-80fc-04b3f56e828b"[39m,
  tag: [32m"6JUaz9QI0Xg="[39m,
  type: [32m"NAS"[39m,
  name: [32m"mockcsv"[39m,
  createdAt: [32m"2024-09-11T12:10:18.618Z"[39m,
  metadataPolicy: {
    authTTLMs: [33m86400000[39m,
    namesRefreshMs: [33m3600000[39m,
    datasetRefreshAfterMs: [33m3600000[39m,
    datasetExpireAfterMs: [33m10800000[39m,
    datasetUpdateMode: [32m"PREFETCH_QUERIED"[39m,
    deleteUnavailableDatasets: [33mtrue[39m,
    autoPromoteDatasets: [33mfalse[39m
  },
  accelerationGracePeriodMs: [33m0[39m,
  accelerationRefreshPeriodMs: [33m0[39m,
  accelerationRefreshSchedule: [32m"0 0 8 * * ?"[39m,
  accelerationActivePolicyType: [32m"PERIOD"[39m,
  accelerationNeve

In [31]:
await dremio('POST', '/catalog/' + encodeURIComponent('dremio:/mockcsv/generated_mockdata_60GB.csv'), {
    entityType: 'dataset',
    id: "dremio:/mockcsv/generated_mockdata_60GB.csv",
    path: [ "mockcsv", "generated_mockdata_60GB.csv" ],
    type: "DATASET",
    "format": {
        "type": "Text",
        "fullPath":  [ "mockcsv", "generated_mockdata_60GB.csv" ],
        "ctime": 0,
        "isFolder": false,
        "location": "/mockcsv/generated_mockdata_60GB.csv",
        "fieldDelimiter": ",",
        "skipFirstLine": false,
        "extractHeader": false,
        "quote": "\"",
        "comment": "#",
        "escape": "\"",
        "lineDelimiter": "\r\n",
        "autoGenerateColumnNames": true,
        "trimHeader": true
    }
})

Error: HTTP error! status: 400

In [15]:
await dremio('POST', '/catalog', {
    entityType: 'source',
    type: "NAS",
    name: "mockcsv",
    config: {
        path: "/csv",
        defaultCtasFormat: "ICEBERG",
        isPartitionInferenceEnabled: false
    },
})

{
  entityType: [32m"source"[39m,
  config: {
    path: [32m"/csv"[39m,
    defaultCtasFormat: [32m"ICEBERG"[39m,
    isPartitionInferenceEnabled: [33mfalse[39m
  },
  state: { status: [32m"good"[39m, suggestedUserAction: [32m""[39m, messages: [] },
  id: [32m"1149e2d9-75be-49d3-80fc-04b3f56e828b"[39m,
  tag: [32m"6JUaz9QI0Xg="[39m,
  type: [32m"NAS"[39m,
  name: [32m"mockcsv"[39m,
  createdAt: [32m"2024-09-11T12:10:18.618Z"[39m,
  metadataPolicy: {
    authTTLMs: [33m86400000[39m,
    namesRefreshMs: [33m3600000[39m,
    datasetRefreshAfterMs: [33m3600000[39m,
    datasetExpireAfterMs: [33m10800000[39m,
    datasetUpdateMode: [32m"PREFETCH_QUERIED"[39m,
    deleteUnavailableDatasets: [33mtrue[39m,
    autoPromoteDatasets: [33mfalse[39m
  },
  accelerationGracePeriodMs: [33m0[39m,
  accelerationRefreshPeriodMs: [33m0[39m,
  accelerationRefreshSchedule: [32m"0 0 8 * * ?"[39m,
  accelerationActivePolicyType: [32m"PERIOD"[39m,
  accelerationNeve

In [11]:
await dremio('POST', '/catalog', {
    entityType: 'source',
    type: "POSTGRES",
    name: "mockdb",
    config: {
        hostname: "postgres",
        port: "5432",
        databaseName: "example",
        username: "user",
        password: "pass",
        authenticationType: "MASTER",
        fetchSize: 200,
        useSsl: false,
        encryptionValidationMode: "CERTIFICATE_AND_HOSTNAME_VALIDATION",
        maxIdleConns: 8,
        idleTimeSec: 60,
        queryTimeoutSec: 0
    },
})

{
  entityType: [32m"source"[39m,
  config: {
    hostname: [32m"postgres"[39m,
    port: [32m"5432"[39m,
    databaseName: [32m"example"[39m,
    username: [32m"user"[39m,
    password: [32m"$DREMIO_EXISTING_VALUE$"[39m,
    authenticationType: [32m"MASTER"[39m,
    fetchSize: [33m200[39m,
    useSsl: [33mfalse[39m,
    encryptionValidationMode: [32m"CERTIFICATE_AND_HOSTNAME_VALIDATION"[39m,
    maxIdleConns: [33m8[39m,
    idleTimeSec: [33m60[39m,
    queryTimeoutSec: [33m0[39m
  },
  state: { status: [32m"good"[39m, suggestedUserAction: [32m""[39m, messages: [] },
  id: [32m"edbdc74b-2554-4433-bba1-19410cbd4162"[39m,
  tag: [32m"h0ekDOX3PnQ="[39m,
  type: [32m"POSTGRES"[39m,
  name: [32m"mockdb"[39m,
  createdAt: [32m"2024-09-11T12:05:04.754Z"[39m,
  metadataPolicy: {
    authTTLMs: [33m86400000[39m,
    namesRefreshMs: [33m3600000[39m,
    datasetRefreshAfterMs: [33m3600000[39m,
    datasetExpireAfterMs: [33m10800000[39m,
    datasetU

## Generate mock data

In [2]:
import { TextLineStream } from "jsr:@std/streams@0.223.0/text-line-stream";
const seed = 12345;

// Simple seedable random number generator (Linear Congruential Generator)
const seededRandom = (seed: number): () => number  => {
    let value = seed % 2147483647;
    return () => {
        value = (value * 16807) % 2147483647;
        return (value - 1) / 2147483646;
    }
}
let rng = seededRandom(seed);
const randomString = (chars: string, length: number): string => {
    let result = '';
    for (let i = 0; i < length; i++) {
        result += chars.charAt(Math.floor(rng() * chars.length));
    }
    return result;
}
const randomLowercase = (length: number) => randomString('abcdefghijklmnopqrstuvwxyz', length)
const randomName = (length: number) => randomString('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 1) + randomLowercase(length - 1)
const randomElement = <T>(array: T[]) : T => array[Math.floor(rng()*array.length)]


const domains = ['gmail.com', 'cs.aau.dk', 'unipd.it', 'studenti.unipd.it', 'hotmail.com', 'example.com']
const cities = ['Aalborg', 'Padova', 'New york', 'Roma', 'London', 'Paris', 'Venice']


const generateMockCSV = (filePath: string, GBs : number) => {
    const seed = 12345
    rng = seededRandom(seed); // resets the seed
    Deno.writeTextFileSync(filePath, 'Name,Surname,Email,Value,City\n', { append: false });
    
    const rowsToGenerate = (1000 * 1000 * 16) * GBs; // Number of rows ( 1Gb = 1000*1000*16)
    const chunkSize = 10000;        // Chunk size to control memory usage  
    for (let chunk = 0; chunk < rowsToGenerate / chunkSize; chunk++) {
        let data = '';
        for (let i = 0; i < chunkSize; i++) {
            const name = randomName(8);
            const surname = randomName(10 + Math.floor(rng()*6));
            const domain = randomElement(domains)
            const email = `${name}.${surname}@${domain}`;
            const value = Math.floor(rng() * 10000);
            const city = randomElement(cities)
            data += `${name},${surname},${email},${value},${city}\n`;
        }
        Deno.writeTextFileSync(filePath, data, { append: true });
    }
    console.log(`Seeded CSV file generated at ${filePath}`);   
}

const logMatchesInsideCSV = async (filePath: string) => {
    using f = await Deno.open(filePath);
    const readable = f.readable
        .pipeThrough(new TextDecoderStream()) // decode Uint8Array to string
        .pipeThrough(new TextLineStream()) // split string line by line
    for await (const data of readable) {
        const line : string = data
        if(line.startsWith('Esscqwxx,')) console.log(line)
    }
}


In [3]:
await generateMockCSV('./data/csv/generated_mockdata_30GB.csv', 30)

Seeded CSV file generated at ./data/csv/generated_mockdata_30GB.csv


In [7]:
await logMatchesInsideCSV('./data/csv/generated_mockdata_30GB.csv')

Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova


In [4]:
await logMatchesInsideCSV('./data/csv/generated_mockdata_60GB.csv')

Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova


In [6]:
await logMatchesInsideCSV('./data/csv/generated_mockdata_60GB.csv')

Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205,Padova
Esscqwxx,Qyboirdgwujtpjq,Esscqwxx.Qyboirdgwujtpjq@cs.aau.dk,8205