In [25]:
%pylab
from __future__ import print_function

%matplotlib inline

Using matplotlib backend: agg
Populating the interactive namespace from numpy and matplotlib


In [26]:
from __future__ import print_function

import csv
from src import main, utils
import sys

projects = main.load_projects()
interests = ['argouml v0.26.2 method', 'mucommander v0.8.5 method']

p = list()
for project in projects:
    if ' '.join([project.name, project.version, project.level]) not in interests:
        continue
    p.append(project)

In [27]:
project = p[0]
print(project)
repos = main.load_repos(project)
goldsets = main.load_goldsets(project)

queries = main.create_queries(project)
snapshot = main.create_release_corpus(project, repos)
changesets = main.create_corpus(project, repos, main.ChangesetCorpus, use_level=False)

snapshot_lda, _  = main.create_lda_model(project, snapshot, None, "Release", use_level=True)
changeset_lda, _ = main.create_lda_model(project, changesets, None, "Changeset", use_level=False)

snapshot_ranks = main.read_ranks(project, "release")
changeset_ranks = main.read_ranks(project, "changeset")
snapshot_frms = dict( (y,(x,z)) for x,y,z in main.get_frms(goldsets, snapshot_ranks))
changeset_frms = dict( (y,(x,z)) for x,y,z in  main.get_frms(goldsets, changeset_ranks))

Project(name='argouml', printable_name='ArgoUML', version='v0.26.2', level='method', ref='refs/tags/VERSION_0_26_2', alpha=None, eta=None, passes=5, iterations=1000, num_topics=500, src_url='http://argouml-downloads.tigris.org/nonav/argouml-0.26.2/ArgoUML-0.26.2-src.zip', data_path='data/argouml/', full_path='data/argouml/v0.26.2/', src_path='data/argouml/v0.26.2/src')


In [28]:
q = dict()
for query, topics in main.get_topics(snapshot_lda, queries, by_ids=["5258", "5088"], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q[query[0]] = topics

qc = dict()
for query, topics in main.get_topics(changeset_lda, queries, by_ids=["5258", "5088"], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc[query[0]] = topics

q5258 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[5258][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q5258[query[0]] = topics
#print(q5258)

qc5258 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[5258][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc5258[query[0]] = topics
#print(q5258)

q5088 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[5088][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q5088[query[0]] = topics
#print(q5088)

qc5088 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[5088][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc5088[query[0]] = topics
#print(qc5088)

In [29]:
def printer(q, model):
    for qid, topics in q.items():
        print(qid, "num topics:", len(topics))
        for t in topics:
                print("    ", t[0], t[1])
                for word in model.show_topic(t[0]):
                    if word[0]>= 0.05:
                        print("        {0} {1}".format(*word))

# Ranks

In [30]:
print(snapshot_frms[5258])
print(changeset_frms[5258])
print(snapshot_frms[5088])
print(changeset_frms[5088])

(1, 'org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName()')
(8138, 'org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName()')
(124, 'org.argouml.model.mdr.XmiWriterMDRImpl.write()')
(1, 'org.argouml.persistence.TestProfileConfigurationFilePersister.testWritePreviouslyLoadedProfile()')


In [31]:
!cat data/argouml/v0.26.2/goldsets/method/5258.txt

org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName()


In [32]:
!cat data/argouml/v0.26.2/goldsets/method/5088.txt

org.argouml.persistence.TestProfileConfigurationFilePersister.setUp()
org.argouml.persistence.TestProfileConfigurationFilePersister.testWritePreviouslyLoadedProfile()
org.argouml.model.mdr.XmiReferenceProviderImpl.getReference(RefObject)
org.argouml.model.mdr.XmiWriterMDRImpl.write()


# Query words (preprocessed)

In [33]:
queries.metadata = True
qs = list(filter(lambda x: x[1][0] in ["5258", "5088"], queries))

for query, metadata in qs:
    doc = sorted(query, key=lambda x: x[1], reverse=True)
    words = [ ( freq, queries.id2word[wid] ) for wid, freq in doc ]
    print(metadata[0], "num words:", len(words))
    for word in words:
        print("        {1} ({0})".format(*word))

5088 num words: 49
        profiles (4)
        xmi (3)
        user (3)
        profile (3)
        write (3)
        save (2)
        files (2)
        defined (2)
        loaded (2)
        impl (2)
        mdr (2)
        writer (2)
        models (2)
        able (1)
        available (1)
        implemented (1)
        file (1)
        model (1)
        issue (1)
        release (1)
        aren (1)
        using (1)
        isn (1)
        zargo (1)
        creating (1)
        removed (1)
        configuration (1)
        removing (1)
        empty (1)
        won (1)
        seams (1)
        persister (1)
        usage (1)
        simply (1)
        written (1)
        depend (1)
        due (1)
        configured (1)
        deeper (1)
        directories (1)
        experimentally (1)
        flag (1)
        functionality (1)
        persist (1)
        prevents (1)
        tackle (1)
        unassigned (1)
        wasn (1)
        writing (1)
5258 num words: 6
        nam

# Query topics

In [34]:
print("Snapshot")
printer(q, snapshot_lda)

print()
print("Changeset")
printer(qc, changeset_lda)

Snapshot
5258 num topics: 3
     194 0.583235451342
        0.178309175492 rule
        0.121059499556 perspective
        0.0762293638816 name
        0.0690858822929 rules
        0.0503684867632 explorer
     226 0.185504920139
        0.1856541011 machine
        0.11777883397 object
        0.102723364576 handle
        0.0773127578072 collaboration
        0.0647654447602 expr
        0.0556342824257 represented
        0.0545723769322 set
     464 0.148426295186
        0.388703354048 classifier
        0.0789009067262 object
5088 num topics: 18
     281 0.192892482112
        0.0969630906947 profiles
        0.0777460707084 profile
     283 0.134882719488
        0.0805840860895 writer
        0.0625096636339 component
        0.0515641337096 file
        0.0505557374166 xmi
     38 0.111432315008
        0.0837400317472 file
        0.052783578714 chooser
     119 0.0964613712182
        0.156409544424 file
     103 0.0691276760039
        0.16809440942 file
     111 0.0685104

# Issue 5258 top method topics

In [35]:
print("Snapshot")
printer(q5258, snapshot_lda)

print()
print("Changeset")
printer(qc5258, changeset_lda)

Snapshot
org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName() num topics: 3
     194 0.816857912065
        0.178309175492 rule
        0.121059499556 perspective
        0.0762293638816 name
        0.0690858822929 rules
        0.0503684867632 explorer
     202 0.067793539522
        0.107613146742 elem
        0.0827978683518 perspective
        0.0708453920312 add
        0.0578630351812 delete
     464 0.0568779601772
        0.388703354048 classifier
        0.0789009067262 object

Changeset
org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName() num topics: 2
     432 0.757164655495
        0.0893536710693 jar
        0.0678621781371 argouml
        0.0517429904434 org
     332 0.184247109211
        0.0834646177049 rule
        0.0673931412026 perspective


# Issue 5088 top method topics

In [36]:
print()
print("Snapshot")
printer(q5088, snapshot_lda)

print()
print("Changeset")
printer(qc5088, changeset_lda)


Snapshot
org.argouml.model.mdr.XmiWriterMDRImpl.write() num topics: 6
     283 0.560131002273
        0.0805840860895 writer
        0.0625096636339 component
        0.0515641337096 file
        0.0505557374166 xmi
     82 0.155389811181
        0.112620395345 xmi
        0.100644430345 input
        0.0743053978246 exception
        0.0681277303619 stream
        0.0663342788556 reader
        0.0647220093709 file
     41 0.117416586732
        0.116364685128 exception
        0.113558278325 object
        0.0930610719178 invalid
        0.0875901359546 handle
        0.0700790042915 element
     135 0.0973695698302
     327 0.0250058497936
        0.118631551517 handle
        0.0784944389706 object
        0.0687597203891 command
        0.0598388243215 set
        0.0578894470115 model
        0.0522935577955 token
        0.0511620081769 execute
     462 0.0236659035944
        0.0555031990487 type

Changeset
org.argouml.persistence.TestProfileConfigurationFilePersister.testWrit