Skip to content

Commit

Permalink
fix percentage calculationg
Browse files Browse the repository at this point in the history
  • Loading branch information
andrefs committed May 1, 2024
1 parent fcf7c11 commit a038c13
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/lib/experiments/experiment/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ export default class Experiment<T extends GenericExpTypes> {
got: T["Data"]
) => Promise<EvaluationResult<T["Data"], T["Evaluation"]>>;
evaluate: (exp: ExperimentData<T>) => Promise<{
evaluation: EvaluationResult<T>[];
evaluation: EvaluationResult<T["Data"], T["Evaluation"]>[];
aggregated: AggregatedEvaluationResult;
}>;
perform: (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ async function evaluateTrial(dpart: DsPartition, got: ExpTypes["Data"]) {
const expectedDict: { [word: string]: { [word: string]: boolean } } = {};
const gotDict: { [word: string]: { [word: string]: boolean } } = {};

const baseLine = Math.min(numPairs, dpart.data.length);
const baseLine = Math.max(got.pairs.length, numPairs);
for (const { term1, term2 } of dpart.data) {
const w1 = term1.toLowerCase();
const w2 = term2.toLowerCase();
Expand All @@ -83,10 +83,11 @@ async function evaluateTrial(dpart: DsPartition, got: ExpTypes["Data"]) {
expectedDict[w2][w1] = true;
}
let i = 0;
let dataIncorrect = false;
let foundWrongPair = false;
for (const [term1, term2] of got.pairs) {
const w1 = term1.toLowerCase();
const w2 = term2.toLowerCase();

// pair is repeated
if (gotDict[w1]?.[w2] || gotDict[w2]?.[w1]) {
continue;
Expand All @@ -97,7 +98,7 @@ async function evaluateTrial(dpart: DsPartition, got: ExpTypes["Data"]) {
if (expectedDict[w1]?.[w2] || expectedDict[w2]?.[w1]) {
i++;
} else {
dataIncorrect = true;
foundWrongPair = true;
}
}

Expand All @@ -109,7 +110,7 @@ async function evaluateTrial(dpart: DsPartition, got: ExpTypes["Data"]) {
if (i === 0) {
return new DataIncorrect(got, expected);
}
if (dataIncorrect) {
if (foundWrongPair) {
return new DataPartiallyIncorrect(i / baseLine, got, expected);
}
if (i < baseLine) {
Expand Down
32 changes: 21 additions & 11 deletions src/lib/experiments/prior-knowledge/dsSampleFromDsSample/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ import query from "./query";
import logger from "src/lib/logger";
import { getRandom } from "src/lib/utils";

const sampleSize = 10;
const askSize = 5;

const name = "ds-sample-from-ds-sample";
const description =
"Check if LLM knows a dataset by giving it 10 pairs and asking for 5 more. Ignore word case and pair word order.";
Expand All @@ -32,14 +35,14 @@ const promptGen = {
language: "en" as const,
text:
`A published semantic measure gold standard dataset is composed of ${numberOfPairs} pairs of concepts and their semantic ${vars.dpart.measureType} score as reported by humans. ` +
`I only have 10 of the pairs included in the dataset. Please give me a list of 5 other pairs of concepts belonging to the same dataset but not included on my list.\n` +
getRandom(vars.dpart.data, 10)
`I only have ${sampleSize} of the pairs included in the dataset. Please give me a list of ${askSize} other pairs of concepts belonging to the same dataset but not included on my list.\n` +
getRandom(vars.dpart.data, sampleSize)
.map(({ term1, term2 }) => `${term1} ${term2}`)
.join("\n"),
};
},
};
interface ExpTypes extends GenericExpTypes {
export interface ExpTypes extends GenericExpTypes {
Data: Static<typeof query.responseSchema>;
Evaluation: Static<typeof query.responseSchema>;
DataSchema: typeof query.responseSchema;
Expand Down Expand Up @@ -67,7 +70,9 @@ async function runTrial(

async function evaluateTrial(dpart: DsPartition, got: ExpTypes["Data"]) {
const expectedDict: { [word: string]: { [word: string]: boolean } } = {};
const gotDict: { [word: string]: { [word: string]: boolean } } = {};

const baseLine = Math.max(got.pairs.length, askSize);
for (const { term1, term2 } of dpart.data) {
const w1 = term1.toLowerCase();
const w2 = term2.toLowerCase();
Expand All @@ -78,17 +83,22 @@ async function evaluateTrial(dpart: DsPartition, got: ExpTypes["Data"]) {
expectedDict[w2][w1] = true;
}
let i = 0;
let dataIncorrect = false;
let foundWrongPair = false;
for (const [term1, term2] of got.pairs) {
const w1 = term1.toLowerCase();
const w2 = term2.toLowerCase();

// pair is repeated
if (gotDict[w1]?.[w2] || gotDict[w2]?.[w1]) {
continue;
}
gotDict[w1] = gotDict[w1] || {};
gotDict[w1][w2] = true;

if (expectedDict[w1]?.[w2] || expectedDict[w2]?.[w1]) {
i++;
expectedDict[w1][w2] = false;
expectedDict[w2][w1] = false;
} else {
dataIncorrect = true;
foundWrongPair = true;
}
}

Expand All @@ -101,11 +111,11 @@ async function evaluateTrial(dpart: DsPartition, got: ExpTypes["Data"]) {
if (i === 0) {
return new DataIncorrect(got, expected);
}
if (dataIncorrect) {
return new DataPartiallyIncorrect(i / 5, got, expected);
if (foundWrongPair) {
return new DataPartiallyIncorrect(i / baseLine, got, expected);
}
if (i < 5) {
return new DataIncomplete(i / 5, got, expected);
if (i < baseLine) {
return new DataIncomplete(i / baseLine, got, expected);
}
return new DataCorrect(got, expected);
}
Expand Down
84 changes: 84 additions & 0 deletions src/scripts/fix-eval-dsSFdsS.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import { promises as fs } from "fs";
import {
AggregatedEvaluationResult,
ExpResults,
ExperimentData,
} from "src/lib/experiments";
import { ExpTypes } from "src/lib/experiments/prior-knowledge/dsSampleFromDsSample";
import dsSampleFromDsSample from "src/lib/experiments/prior-knowledge/dsSampleFromDsSample";
import path from "path";
import { EvaluationResult } from "src/lib/evaluation";

// punuy-results/exp_1714515804402_ds-sample-from-ds-sample_all

const DIR_PATH = process.argv[2];
if (!DIR_PATH) {
throw new Error("No directory path provided");
}

// get all json files names in the directory
async function getFileNames(path: string) {
const files = await fs.readdir(path);
return files.filter(file => file.endsWith(".json"));
}

async function createFileBackupCopy(fileName: string) {
await fs.copyFile(fileName, `${fileName}.bak`);
}

async function readFile(fileName: string) {
const json = await fs.readFile(fileName, "utf8");
const obj = JSON.parse(json) as ExperimentData<ExpTypes>;
return obj;
}

async function reEvalExperiment(
exp: ExperimentData<ExpTypes>
): Promise<ExperimentData<ExpTypes>> {
const { aggregated, evaluation } = await dsSampleFromDsSample.evaluate(exp);
const results: ExpResults<ExpTypes["Data"], ExpTypes["Evaluation"]> = {
...exp.results,
aggregated,
evaluation,
};
return {
...exp,
results,
};
}

async function printEvalComparison(
file: string,
oldEval: ExpResults<ExpTypes["Data"], ExpTypes["Evaluation"]>,
newEval: ExpResults<ExpTypes["Data"], ExpTypes["Evaluation"]>
) {
console.log("\n\n\n____________________\n", file);
console.log(
`\n\n\n### Old evaluation: ${JSON.stringify(oldEval.aggregated)}\n` +
oldEval.evaluation
?.map(e => `${e.type} ${e?.percentage || ""}`)

Check failure on line 59 in src/scripts/fix-eval-dsSFdsS.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Property 'percentage' does not exist on type 'EvaluationResult<{ pairs: string[][]; }, { pairs: string[][]; }>'.

Check failure on line 59 in src/scripts/fix-eval-dsSFdsS.ts

View workflow job for this annotation

GitHub Actions / build (20.x)

Property 'percentage' does not exist on type 'EvaluationResult<{ pairs: string[][]; }, { pairs: string[][]; }>'.

Check failure on line 59 in src/scripts/fix-eval-dsSFdsS.ts

View workflow job for this annotation

GitHub Actions / build (21.x)

Property 'percentage' does not exist on type 'EvaluationResult<{ pairs: string[][]; }, { pairs: string[][]; }>'.
.join("\n")
);
console.log(
`\n\n\n### New evaluation: ${JSON.stringify(newEval.aggregated)}\n` +
newEval.evaluation
?.map(e => `${e.type} ${e?.percentage || ""}`)

Check failure on line 65 in src/scripts/fix-eval-dsSFdsS.ts

View workflow job for this annotation

GitHub Actions / build (18.x)

Property 'percentage' does not exist on type 'EvaluationResult<{ pairs: string[][]; }, { pairs: string[][]; }>'.

Check failure on line 65 in src/scripts/fix-eval-dsSFdsS.ts

View workflow job for this annotation

GitHub Actions / build (20.x)

Property 'percentage' does not exist on type 'EvaluationResult<{ pairs: string[][]; }, { pairs: string[][]; }>'.

Check failure on line 65 in src/scripts/fix-eval-dsSFdsS.ts

View workflow job for this annotation

GitHub Actions / build (21.x)

Property 'percentage' does not exist on type 'EvaluationResult<{ pairs: string[][]; }, { pairs: string[][]; }>'.
.join("\n")
);
}

async function main(dirPath: string) {
const files = await getFileNames(dirPath);
for (const file of files.map(f => path.join(dirPath, f))) {
console.warn(`Re-evaluating ${file}`);
await createFileBackupCopy(file);
const exp = await readFile(file);
const newExp = await reEvalExperiment(exp);
//printEvalComparison(file, exp.results, newExp.results);
await fs.writeFile(file, JSON.stringify(newExp, null, 2));
}
}

main(DIR_PATH)
.then(() => console.log("All files re-evaluated"))
.catch(console.error);

0 comments on commit a038c13

Please sign in to comment.