-
Notifications
You must be signed in to change notification settings - Fork 395
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CommonJS Prototype Setup For CommonJS Support #129
Comments
CommonJS Support ProtoTypeTested and Working as of 2/19/2024 by @john1234brownGenerated by Chat GPT Model 3.5magika.js /*
This file has been generated by chatGPT 3.5 for the usage of magika in commonJS
The request and generation of this code from chatGPT 3.5 was started and created
on 2/19/2024 at 6:55 pm EST time by john1234brown
and has been tested by john1234brown in node CommonJS applications
This most likely won't work for browser implementation the TextDecoder will need to be changed!
I have done this to help the open source community stay secure Module applications shouldn't be the only supported types to really offer more secure services world wide more languages need support as well as backward compatibility for the best experience in the development community!
I do not take credit for this work all though it did take time to test
and simulate to ensure it works so please give credit where credit is do
I am a self taught Developer since 2014 at the age of 14 now fixing to be 24
and I enjoy doing this a hobby in my pass time so please give credit
where credit is due please... and thank you kindly!
*/
const tf = require("@tensorflow/tfjs-node");
const fetch = require("node-fetch");
const ContentType = Object.freeze({
EMPTY: "empty",
GENERIC_TEXT: "txt",
UNKNOWN: "unknown",
});
class Config {
load(configURL) {
if (this.labels) return Promise.resolve();
return fetch(configURL)
.then((response) => response.json())
.then((config) => {
this.minFileSizeForDl = config["min_file_size_for_dl"];
this.paddingToken = config["padding_token"];
this.labels = config["labels"];
this.begBytes = config["input_size_beg"];
this.midBytes = config["input_size_beg"];
this.endBytes = config["input_size_beg"];
this.extractSize =
this.begBytes > 0
? this.begBytes
: this.midBytes > 0
? this.midBytes
: this.endBytes;
});
}
}
class Model {
load(modelURL) {
if (this.model) return Promise.resolve();
return tf.loadGraphModel(modelURL).then((model) => {
this.model = model;
});
}
predict(features) {
const modelInput = tf.tensor([features]);
const modelOutput = tf.squeeze(this.model.predict(modelInput));
const maxProbability = tf.argMax(modelOutput);
const labelIndex = maxProbability.dataSync()[0];
const labelProbabilities = modelOutput.dataSync();
maxProbability.dispose();
modelInput.dispose();
modelOutput.dispose();
return [labelIndex, labelProbabilities];
}
}
class Magika {
async load({ modelURL, configURL } = {}) {
modelURL = modelURL || "https://google.github.io/magika/model/model.json";
configURL = configURL || "https://google.github.io/magika/model/config.json";
this.config = new Config();
this.model = new Model();
await Promise.all([this.config.load(configURL), this.model.load(modelURL)]);
}
async identifyBytes(fileBytes) {
return this._identifyBytes(fileBytes, (args) => this._generateResult(args));
}
async identifyBytesFull(fileBytes) {
return this._identifyBytes(fileBytes, (args) =>
this._generateResultFull(args)
);
}
_generateResult({ label, score }) {
return { label, score };
}
_generateResultFull({ label, score, scores }) {
const labels = [
...Object.values(this.config.labels).map((l) => l.name),
...Object.values(ContentType),
];
if (!scores) {
scores = labels.map((l) => (l === label ? score : 0));
}
return {
label,
score,
labels: Object.fromEntries(labels.map((l, i) => [l, scores[i] || 0])),
};
}
_getResultForAFewBytes(fileBytes, generateResult) {
const decoder = new TextDecoder("utf-8", { fatal: true });
try {
decoder.decode(fileBytes);
return generateResult({
score: 1.0,
label: ContentType.GENERIC_TEXT,
});
} catch (error) {
return generateResult({ score: 1.0, label: ContentType.UNKNOWN });
}
}
async _identifyBytes(fileBytes, generateResult) {
if (fileBytes.length === 0)
return generateResult({ score: 1.0, label: ContentType.EMPTY });
if (fileBytes.length <= this.config.minFileSizeForDl)
return this._getResultForAFewBytes(fileBytes, generateResult);
const [extractionResult, features] = await this._extractFeaturesFromBytes(
fileBytes,
generateResult
);
if (extractionResult) return extractionResult;
// End of special cases, now we can do deep learning!
return this._generateResultFromPrediction(
this.model.predict(features),
generateResult
);
}
_generateResultFromPrediction([labelIndex, scores], generateResult) {
const score = scores[labelIndex];
const labelConfig = this.config["labels"][labelIndex];
const { name, threshold } = labelConfig;
if (score >= threshold)
return generateResult({ score, label: name, scores });
if (labelConfig["is_text"])
return generateResult({ score, label: ContentType.GENERIC_TEXT, scores });
return generateResult({ score, label: ContentType.UNKNOWN, scores });
}
async _extractFeaturesFromBytes(fileBytes, generateResult) {
const fileArray = new Uint16Array(fileBytes);
if (fileArray.length <= this.config.minFileSizeForDl)
return [
this._getResultForAFewBytes(fileBytes, generateResult),
null,
];
let beg = new Uint16Array(this.config.begBytes).fill(
this.config.paddingToken
);
let mid = new Uint16Array(this.config.midBytes).fill(
this.config.paddingToken
);
let end = new Uint16Array(this.config.endBytes).fill(
this.config.paddingToken
);
beg.set(fileArray.slice(0, this.config.begBytes), 0);
const halfpoint = 2 * Math.round(fileArray.length / 2);
const startHalf = Math.max(0, halfpoint - this.config.midBytes / 2);
const halfChunk = fileArray.slice(
startHalf,
startHalf + this.config.midBytes
);
mid.set(halfChunk, this.config.midBytes / 2 - halfChunk.length / 2);
const endChunk = fileArray.slice(
Math.max(0, fileArray.length - this.config.endBytes)
);
const endOffset = Math.max(0, this.config.endBytes - endChunk.length);
end.set(endChunk, endOffset);
return [null, [...beg, ...mid, ...end]];
}
}
module.exports = { Magika }; To utilize it add this file to your root folder and require like soindex.js const { Magika } = require('./magika.js)
async function runMagika() {
const magika = new Magika();
await magika.load(); // Load the model and configuration
const data = Buffer.from('your file data here');
const prediction = await magika.identifyBytes(data);
console.log(prediction);
}
runMagika(); |
john1234brown
changed the title
Suggestion TypeScript Support But No Support For Require in commonJS
CommonJS Prototype Setup For CommonJS Support
Feb 20, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I would love to utilize a commonJS version of this magika API and service if yall could potentially make something like this available where i'm able to import into node utilizing regular require() statements instead of needing to use imports forcing users to go to typescript when is possible for back compatibility support for commonJS as existing typeScript modules have shown to due this in the past.
The text was updated successfully, but these errors were encountered: