Skip to content

Commit

Permalink
polishing
Browse files Browse the repository at this point in the history
  • Loading branch information
xrotwang committed Jun 27, 2018
1 parent 3bb7b14 commit 8062e58
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 25 deletions.
15 changes: 14 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,17 @@
cldf/
app/cluster/
app/source/langsGeo.json
app/source/subgraph-names.js
app/source/infomap-names.js
app/source/words.json
app/subgraph/
output/communities/infomap.csv
output/data/words.csv
output/graphs/
output/languages.geojson
output/concepts.md
output/stats/
output/graphs/
clics.sqlite

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,9 @@ $ clics -t 3 subgraph
```

This will populate the folder `app` with json-files which contain the network information needed to browse the data.


### Inspecting the netoworks

Now you can open `app/index.html` in your browser to inspect the colexification networks detected in the
datasets.
24 changes: 1 addition & 23 deletions src/pyclics/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def list_(args):
'',
'TOTAL',
args.api.db.fetchone(
"select count(distinct concepticon_id) from parametertable")[0],
"select count(distinct p.concepticon_id) from parametertable as p, formtable as f where f.parameter_id = p.id")[0],
args.api.db.fetchone(
"select count(*) from languagetable")[0],
args.api.db.fetchone(
Expand Down Expand Up @@ -129,7 +129,6 @@ def languages(args):
@command()
def concepts(args):
concepts = defaultdict(list)
ambiguous_concept_mapping = set()
args.api._log = args.log

with args.api.csv_writer(Path('output', 'data'), 'words') as writer:
Expand All @@ -145,21 +144,7 @@ def concepts(args):
'ClicsValue'])

for v, forms in pb(args.api.db.iter_wordlists()):
visited = {}

for form in forms:
norm_gloss = form.gloss.lower()
if norm_gloss.startswith('the ') or norm_gloss.startswith('to '):
norm_gloss = ' '.join(norm_gloss.split()[1:])
if form.concepticon_id in visited and visited[form.concepticon_id] != norm_gloss:
# The concept was already seen, but with a different gloss!
ambiguous_concept_mapping.add((
v.gid,
form.concepticon_id,
form.gloss,
visited[form.concepticon_id]))
continue

concepts[form.concepticon_id].append((v.family, v.gid, form.gid))
writer.writerow([
form.gid,
Expand All @@ -171,13 +156,6 @@ def concepts(args):
v.family,
form.form,
form.clics_form])
visited[form.concepticon_id] = norm_gloss

for am in ambiguous_concept_mapping:
args.log.warn('{0} {1} is linked from different glosses "{2}" and "{3}"'.format(*am))
if ambiguous_concept_mapping:
args.log.warn('Skipped {0} forms due to ambiguous concept mapping'.format(
len(ambiguous_concept_mapping)))

concept_table = Table('Number', 'Concept', 'SemanticField', 'Category', 'Reflexes')
with args.api.csv_writer(Path('output', 'stats'), 'concepts') as writer:
Expand Down
4 changes: 3 additions & 1 deletion src/pyclics/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ def fname(self, d, ext):
return d.joinpath('{0}.{1}'.format(self, ext))

def save(self, graph):
with open(self.fname(self.graphdir, 'bin').as_posix(), 'wb') as f:
if not self.graphdir.exists():
self.graphdir.mkdir()
with self.fname(self.graphdir, 'bin').open('wb') as f:
pickle.dump(graph, f)
write_text(
self.fname(self.graphdir, 'gml'),
Expand Down

0 comments on commit 8062e58

Please sign in to comment.