Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CommonJS Prototype Setup For CommonJS Support #129

Closed
john1234brown opened this issue Feb 19, 2024 · 1 comment
Closed

CommonJS Prototype Setup For CommonJS Support #129

john1234brown opened this issue Feb 19, 2024 · 1 comment

Comments

@john1234brown
Copy link

I would love to utilize a commonJS version of this magika API and service if yall could potentially make something like this available where i'm able to import into node utilizing regular require() statements instead of needing to use imports forcing users to go to typescript when is possible for back compatibility support for commonJS as existing typeScript modules have shown to due this in the past.

@john1234brown
Copy link
Author

john1234brown commented Feb 20, 2024

CommonJS Support ProtoType

Tested and Working as of 2/19/2024 by @john1234brown

Generated by Chat GPT Model 3.5

magika.js

/*
This file has been generated by chatGPT 3.5 for the usage of magika in commonJS
The request and generation of this code from chatGPT 3.5 was started and created
on 2/19/2024 at 6:55 pm EST time by john1234brown
and has been tested by john1234brown in node CommonJS applications
This most likely won't work for browser implementation the TextDecoder will need to be changed!
I have done this to help the open source community stay secure Module applications shouldn't be the only supported types to really offer more secure services world wide more languages need support as well as backward compatibility for the best experience in the development community!
I do not take credit for this work all though it did take time to test
and simulate to ensure it works so please give credit where credit is do
I am a self taught Developer since 2014 at the age of 14 now fixing to be 24
and I enjoy doing this a hobby in my pass time so please give credit
where credit is due please... and thank you kindly!
*/
const tf = require("@tensorflow/tfjs-node");
const fetch = require("node-fetch");

const ContentType = Object.freeze({
  EMPTY: "empty",
  GENERIC_TEXT: "txt",
  UNKNOWN: "unknown",
});

class Config {
  load(configURL) {
    if (this.labels) return Promise.resolve();
    return fetch(configURL)
      .then((response) => response.json())
      .then((config) => {
        this.minFileSizeForDl = config["min_file_size_for_dl"];
        this.paddingToken = config["padding_token"];
        this.labels = config["labels"];
        this.begBytes = config["input_size_beg"];
        this.midBytes = config["input_size_beg"];
        this.endBytes = config["input_size_beg"];
        this.extractSize =
          this.begBytes > 0
            ? this.begBytes
            : this.midBytes > 0
            ? this.midBytes
            : this.endBytes;
      });
  }
}

class Model {
  load(modelURL) {
    if (this.model) return Promise.resolve();
    return tf.loadGraphModel(modelURL).then((model) => {
      this.model = model;
    });
  }

  predict(features) {
    const modelInput = tf.tensor([features]);
    const modelOutput = tf.squeeze(this.model.predict(modelInput));
    const maxProbability = tf.argMax(modelOutput);
    const labelIndex = maxProbability.dataSync()[0];
    const labelProbabilities = modelOutput.dataSync();
    maxProbability.dispose();
    modelInput.dispose();
    modelOutput.dispose();
    return [labelIndex, labelProbabilities];
  }
}

class Magika {
  async load({ modelURL, configURL } = {}) {
    modelURL = modelURL || "https://google.github.io/magika/model/model.json";
    configURL = configURL || "https://google.github.io/magika/model/config.json";
    this.config = new Config();
    this.model = new Model();
    await Promise.all([this.config.load(configURL), this.model.load(modelURL)]);
  }

  async identifyBytes(fileBytes) {
    return this._identifyBytes(fileBytes, (args) => this._generateResult(args));
  }

  async identifyBytesFull(fileBytes) {
    return this._identifyBytes(fileBytes, (args) =>
      this._generateResultFull(args)
    );
  }

  _generateResult({ label, score }) {
    return { label, score };
  }

  _generateResultFull({ label, score, scores }) {
    const labels = [
      ...Object.values(this.config.labels).map((l) => l.name),
      ...Object.values(ContentType),
    ];
    if (!scores) {
      scores = labels.map((l) => (l === label ? score : 0));
    }
    return {
      label,
      score,
      labels: Object.fromEntries(labels.map((l, i) => [l, scores[i] || 0])),
    };
  }

  _getResultForAFewBytes(fileBytes, generateResult) {
    const decoder = new TextDecoder("utf-8", { fatal: true });
    try {
      decoder.decode(fileBytes);
      return generateResult({
        score: 1.0,
        label: ContentType.GENERIC_TEXT,
      });
    } catch (error) {
      return generateResult({ score: 1.0, label: ContentType.UNKNOWN });
    }
  }

  async _identifyBytes(fileBytes, generateResult) {
    if (fileBytes.length === 0)
      return generateResult({ score: 1.0, label: ContentType.EMPTY });
    if (fileBytes.length <= this.config.minFileSizeForDl)
      return this._getResultForAFewBytes(fileBytes, generateResult);
    const [extractionResult, features] = await this._extractFeaturesFromBytes(
      fileBytes,
      generateResult
    );
    if (extractionResult) return extractionResult;
    // End of special cases, now we can do deep learning!
    return this._generateResultFromPrediction(
      this.model.predict(features),
      generateResult
    );
  }

  _generateResultFromPrediction([labelIndex, scores], generateResult) {
    const score = scores[labelIndex];
    const labelConfig = this.config["labels"][labelIndex];
    const { name, threshold } = labelConfig;
    if (score >= threshold)
      return generateResult({ score, label: name, scores });
    if (labelConfig["is_text"])
      return generateResult({ score, label: ContentType.GENERIC_TEXT, scores });
    return generateResult({ score, label: ContentType.UNKNOWN, scores });
  }

  async _extractFeaturesFromBytes(fileBytes, generateResult) {
    const fileArray = new Uint16Array(fileBytes);
    if (fileArray.length <= this.config.minFileSizeForDl)
      return [
        this._getResultForAFewBytes(fileBytes, generateResult),
        null,
      ];

    let beg = new Uint16Array(this.config.begBytes).fill(
      this.config.paddingToken
    );
    let mid = new Uint16Array(this.config.midBytes).fill(
      this.config.paddingToken
    );
    let end = new Uint16Array(this.config.endBytes).fill(
      this.config.paddingToken
    );

    beg.set(fileArray.slice(0, this.config.begBytes), 0);

    const halfpoint = 2 * Math.round(fileArray.length / 2);
    const startHalf = Math.max(0, halfpoint - this.config.midBytes / 2);
    const halfChunk = fileArray.slice(
      startHalf,
      startHalf + this.config.midBytes
    );
    mid.set(halfChunk, this.config.midBytes / 2 - halfChunk.length / 2);

    const endChunk = fileArray.slice(
      Math.max(0, fileArray.length - this.config.endBytes)
    );
    const endOffset = Math.max(0, this.config.endBytes - endChunk.length);
    end.set(endChunk, endOffset);

    return [null, [...beg, ...mid, ...end]];
  }
}

module.exports = { Magika };

To utilize it add this file to your root folder and require like so

index.js

const { Magika } = require('./magika.js)
async function runMagika() {
  const magika = new Magika();
  await magika.load(); // Load the model and configuration
  const data = Buffer.from('your file data here');
  const prediction = await magika.identifyBytes(data);
  console.log(prediction);
}
runMagika();

@john1234brown john1234brown changed the title Suggestion TypeScript Support But No Support For Require in commonJS CommonJS Prototype Setup For CommonJS Support Feb 20, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant