Skip to content
This repository has been archived by the owner on Nov 18, 2019. It is now read-only.

Commit

Permalink
Merge 51b9e01 into fb164a4
Browse files Browse the repository at this point in the history
  • Loading branch information
parmentf committed Nov 12, 2019
2 parents fb164a4 + 51b9e01 commit 9d0777e
Show file tree
Hide file tree
Showing 8 changed files with 5,737 additions and 222 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,6 @@ lib/

# Secret files
token.txt

# Useless format files
data/*.xlsx
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ npm install ezs-conditor

```bash
$ ./bin/affAlign.js < data/1000-notices-conditor-hal.json | ./bin/compareRnsr.js
recall: 0.6487068965517241
correct: 903
recall: 0.7162356321839081
correct: 997
total: 1392
```

Expand Down
51 changes: 51 additions & 0 deletions bin/findIdsInAddresses.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env node
/* eslint-disable no-console */

const csv = require('csv-string');
const { promises } = require('fs');
const { difference, intersection } = require('ramda');
const { depleteString } = require('../lib/strings');
const { isIn } = require('../lib/rnsr');
const RNSR = require('../data/RNSR.json');

promises
.readFile(`${__dirname}/../data/2014_AdressesCorpusTest.tsv`, {
encoding: 'utf8',
})
.then((tsv) => csv.parse(tsv, '\t'))
.then((lines) => {
console.log('found\texpected\taddress');
const rnsrAddresses = lines.slice(1);
let expectedNb = 0;
const { foundNb, wronglyFoundNb } = rnsrAddresses.reduce(({
foundNb: alreadyFound,
wronglyFoundNb: alreadyWronglyFound,
}, rnsrAddress) => {
const depletedAddress = depleteString(rnsrAddress[1]);
const isInAddress = isIn(depletedAddress);
const rnsrIds = RNSR.structures.structure
.filter(isInAddress)
.map((s) => s.num_nat_struct);

const expectedIds = rnsrAddress[0].split(' ; ');
expectedNb += expectedIds.length;

const foundIds = intersection(rnsrIds, expectedIds);
if (foundIds.length < expectedIds.length) {
console.log(`${rnsrIds}\t${expectedIds}\t${depletedAddress}`);
}
const wronglyFoundIds = difference(rnsrIds, expectedIds);
return {
foundNb: alreadyFound + foundIds.length,
wronglyFoundNb: alreadyWronglyFound + wronglyFoundIds.length,
};
}, { foundNb: 0, wronglyFoundNb: 0 });
const recall = foundNb / expectedNb;
console.log('recall:', recall);
console.log('found:', foundNb);
console.log('total:', expectedNb);
const precision = foundNb / (foundNb + wronglyFoundNb);
console.log('precision:', precision);
console.log('wrongs:', wronglyFoundNb);
console.log('total found:', foundNb + wronglyFoundNb);
});
5,575 changes: 5,575 additions & 0 deletions data/2014_AdressesCorpusTest.tsv

Large diffs are not rendered by default.

0 comments on commit 9d0777e

Please sign in to comment.