Skip to content

Commit

Permalink
Check for split/inconsistent clusterings
Browse files Browse the repository at this point in the history
See #27 - may be caused by giving incorrect input files
  • Loading branch information
johnlees committed Aug 7, 2018
1 parent dc9685e commit 6aff3f3
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions PopPUNK/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def printClusters(G, outPrefix, oldClusterFile = None, printRef = True):

# Assign each cluster a name
clustering = {}
foundOldClusters = []

for newClsIdx, newCluster in enumerate(newClusters):

Expand All @@ -322,6 +323,13 @@ def printClusters(G, outPrefix, oldClusterFile = None, printRef = True):
for oldClusterName, oldClusterMembers in oldClusters.items():
join = ref_only.intersection(oldClusterMembers)
if len(join) > 0:
# Check cluster is consistent with previous definitions
if oldClusterName in foundOldClusters:
sys.stderr.write("WARNING: Old cluster " + oldClusterName + " split"
" across multiple new clusters\n")
else:
foundOldClusters.append(oldClusterName)

# Query has merged clusters
if len(join) < len(ref_only):
merge = True
Expand Down

0 comments on commit 6aff3f3

Please sign in to comment.