In [18]:
%pylab
from __future__ import print_function

%matplotlib inline

Using matplotlib backend: agg
Populating the interactive namespace from numpy and matplotlib


In [19]:
from __future__ import print_function

import csv
from src import main, utils
import sys

projects = main.load_projects()
interests = ['argouml v0.26.2 method', 'mucommander v0.8.5 method']

p = list()
for project in projects:
    if ' '.join([project.name, project.version, project.level]) not in interests:
        continue
    p.append(project)

In [20]:
project = p[1]
print(project)
repos = main.load_repos(project)
goldsets = main.load_goldsets(project)

queries = main.create_queries(project)
snapshot = main.create_release_corpus(project, repos)
changesets = main.create_corpus(project, repos, main.ChangesetCorpus, use_level=False)

snapshot_lda, _  = main.create_lda_model(project, snapshot, None, "Release", use_level=True)
changeset_lda, _ = main.create_lda_model(project, changesets, None, "Changeset", use_level=False)

snapshot_ranks = main.read_ranks(project, "release")
changeset_ranks = main.read_ranks(project, "changeset")
snapshot_frms = dict( (y,(x,z)) for x,y,z in main.get_frms(goldsets, snapshot_ranks))
changeset_frms = dict( (y,(x,z)) for x,y,z in  main.get_frms(goldsets, changeset_ranks))

Project(name='mucommander', printable_name='muCommander', version='v0.8.5', level='method', ref='refs/tags/release_0_8_5', alpha=None, eta=None, passes=5, iterations=1000, num_topics=500, src_url='http://trac.mucommander.com/changeset/3794/tags/release_0_8_5?old_path=%2F&format=zip', data_path='data/mucommander/', full_path='data/mucommander/v0.8.5/', src_path='data/mucommander/v0.8.5/src')


In [21]:
q = dict()
for query, topics in main.get_topics(snapshot_lda, queries, by_ids=["37", "142"], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q[query[0]] = topics

qc = dict()
for query, topics in main.get_topics(changeset_lda, queries, by_ids=["37", "142"], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc[query[0]] = topics
    

q37 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[37][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q37[query[0]] = topics


qc37 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[37][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc37[query[0]] = topics

q142 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[142][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q142[query[0]] = topics


qc142 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[142][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc142[query[0]] = topics

In [22]:
def printer(q, model):
    for qid, topics in q.items():
        print(qid, "num topics:", len(topics))
        for t in topics:
                print("    ", t[0], t[1])
                for word in model.show_topic(t[0]):
                    if word[0]>= 0.05:
                        print("        {0} {1}".format(*word))

# Ranks

In [23]:
print(snapshot_frms[37])
print(changeset_frms[37])
print(snapshot_frms[142])
print(changeset_frms[142])

(1, 'com.mucommander.ui.main.menu.MainMenuBar.MainMenuBar(MainFrame)')
(303, 'com.mucommander.ui.action.impl.MinimizeWindowAction.performAction()')
(536, 'com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run()')
(1, 'com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run()')


In [28]:
!cat data/mucommander/v0.8.5/goldsets/method/37.txt

com.mucommander.ui.action.impl.MaximizeWindowAction.MaximizeWindowAction(MainFrame,Hashtable)
com.mucommander.ui.action.impl.MaximizeWindowAction.performAction()
com.mucommander.ui.action.impl.MinimizeWindowAction.MinimizeWindowAction(MainFrame,Hashtable)
com.mucommander.ui.action.impl.MinimizeWindowAction.performAction()
com.mucommander.ui.main.menu.MainMenuBar.MainMenuBar(MainFrame)


In [29]:
!cat data/mucommander/v0.8.5/goldsets/method/142.txt

com.mucommander.ui.main.DrivePopupButton.getPopupMenu()
com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run()


# Query words (preprocessed)

In [24]:
### queries.metadata = True
qs = list(filter(lambda x: x[1][0] in ["37", "142"], queries))

for query, metadata in qs:
    doc = sorted(query, key=lambda x: x[1], reverse=True)
    words = [ ( freq, queries.id2word[wid] ) for wid, freq in doc ]
    print(metadata[0], "num words:", len(words))
    for word in words:
        print("        {1} ({0})".format(*word))

# Query topics

In [25]:
print("Snapshot")
printer(q, snapshot_lda)

print()
print("Changeset")
printer(qc, changeset_lda)

Snapshot
142 num topics: 13
     486 0.200052424627
        0.225407162454 java
        0.0667966365939 version
     129 0.152348084849
        0.0836325698865 drive
     197 0.0949708839446
        0.335195301674 component
        0.151223520535 popup
        0.0779508804518 components
        0.0601404620095 toolbar
        0.0592926573171 visible
     433 0.0872638102855
        0.0705262831764 folder
        0.0665606935383 file
     365 0.0868366789345
        0.182387091563 names
        0.0747587839647 flush
        0.0729320850512 col
        0.0527956478951 tbl
     254 0.0799372199553
        0.146340723821 file
        0.0801011442289 icons
        0.0597020707911 system
        0.0544446927419 code
     75 0.0684727683493
        0.213794985131 main
        0.210647551907 frame
        0.143999368226 action
        0.143530673908 properties
        0.0719823945338 object
        0.0715686215803 string
        0.070676652891 hashtable
     397 0.0486127656029
        0.24105

# Issue 37 top method topics

In [26]:
print("Snapshot")
printer(q37, snapshot_lda)

print()
print("Changeset")
printer(qc37, changeset_lda)

Snapshot
com.mucommander.ui.main.menu.MainMenuBar.MainMenuBar(MainFrame) num topics: 2
     280 0.895544528469
        0.223045774075 menu
        0.0922777930903 item
        0.0766523325734 action
        0.0556715337321 mnemonic
        0.0534673653631 add
     492 0.104051366312
        0.405491679193 action
        0.0963417533379 descriptor
        0.0667780136219 register
        0.0570310172299 factory

Changeset
com.mucommander.ui.action.impl.MinimizeWindowAction.performAction() num topics: 3
     76 0.410880046182
        0.150192555606 window
        0.135369924693 focus
        0.11055842372 frame
        0.109025490271 main
        0.0561341944788 request
     472 0.240844841356
        0.137835824374 events
        0.137234417418 mode
        0.121380838756 thread
        0.0711481876135 action
        0.062647338085 code
        0.0575339382588 separate
     392 0.237830668017
        0.127821431271 popup
        0.0687645755657 drive
        0.0681926443664 menu
       

# Issue 142  top method topics

In [27]:
print()
print("Snapshot")
printer(q142, snapshot_lda)

print()
print("Changeset")
printer(qc142, changeset_lda)


Snapshot
com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run() num topics: 12
     111 0.359103937365
        0.392817788721 icon
        0.143936260431 file
     175 0.115558589428
        0.647603479703 name
        0.12470674585 string
     254 0.101636680774
        0.146340723821 file
        0.0801011442289 icons
        0.0597020707911 system
        0.0544446927419 code
     235 0.0851824939718
     362 0.0807915321745
        0.130791849747 focus
        0.0797450868169 request
        0.067886389463 component
        0.0567156286773 swing
     479 0.0692591462939
        0.194917588524 volumes
        0.0928604624774 volume
        0.0650423475007 file
     280 0.0600650649487
        0.223045774075 menu
        0.0922777930903 item
        0.0766523325734 action
        0.0556715337321 mnemonic
        0.0534673653631 add
     129 0.0521915888343
        0.0836325698865 drive
     330 0.0169814388655
        0.16194433017 items
        0.0698979458364 com