In [38]:
import sys
import time
from pydriller import RepositoryMining

In [18]:
REPOSITORY_URL = '/Users/janschill/code/rails/rails'
PROJECT_REPOSITORY_URL = '/Users/janschill/code/janschill/uni-project'
DEVOPS_REPOSITORY_URL = '/Users/janschill/code/janschill/uni-devops'

In [17]:
# Only run when absolutely necessary as it traverses ~80k commits
def print_commit_history(url):
    for commit in RepositoryMining(url).traverse_commits():
        print("commit" + str(commit))
        for m in commit.modifications:
            print(
                "- Author {}".format(commit.author.name),
                " modified {}".format(m.filename),
                " with a change type of {}".format(m.change_type.name),
                " and the complexity is {}".format(m.complexity)
            )
print_commit_history(PROJECT_REPOSITORY_URL)

commit<pydriller.domain.commit.Commit object at 0x106cb74d0>
- Author Jan Schill  modified README.md  with a change type of ADD  and the complexity is None
commit<pydriller.domain.commit.Commit object at 0x1062aca90>
- Author Jan Schill  modified index.html  with a change type of ADD  and the complexity is None
commit<pydriller.domain.commit.Commit object at 0x104a5bd90>
- Author Jan Schill  modified .gitignore  with a change type of ADD  and the complexity is None
- Author Jan Schill  modified README.md  with a change type of MODIFY  and the complexity is None
- Author Jan Schill  modified index.js  with a change type of ADD  and the complexity is 1
- Author Jan Schill  modified package.json  with a change type of ADD  and the complexity is None
commit<pydriller.domain.commit.Commit object at 0x104a5a9d0>
- Author Jan Schill  modified index.html  with a change type of DELETE  and the complexity is None
- Author Jan Schill  modified .gitignore  with a change type of ADD  and the comple

In [37]:
from collections import defaultdict
from pydriller import ModificationType
import datetime

def count_file_modifications(url):
    commit_counts = {}
    # datelimit = datetime.date(2018, 1, 1)
    from_tag = 'v5.0.0.beta1'
    for commit in RepositoryMining(url, from_tag=from_tag).traverse_commits():
        for modification in commit.modifications:
            
            new_path = modification.new_path
            old_path = modification.old_path
            try:
                if modification.change_type == ModificationType.RENAME:
                    commit_counts[new_path]=commit_counts.get(old_path,0)+1
                    commit_counts.pop(old_path)
                elif modification.change_type == ModificationType.DELETE:
                    commit_counts.pop(old_path, '')
                elif modification.change_type == ModificationType.ADD:
                    commit_counts[new_path] = 1
                else: # modification to existing file
                        commit_counts [old_path] += 1
            except Exception as e: 
                # print("something went wrong with: " + str(modification))
                pass
            
    return commit_counts

start = time.time()
commit_counts = count_file_modifications(REPOSITORY_URL)
print('==========')
print(end - start)
print('==========')
end = time.time()
sorted(commit_counts.items(), key=lambda x:x[1], reverse=True)

[('lib/arel/visitors/to_sql.rb', 227),
 ('test/test_select_manager.rb', 132),
 ('Gemfile.lock', 129),
 ('lib/arel/select_manager.rb', 118),
 ('test/visitors/test_to_sql.rb', 101),
 ('arel.gemspec', 94),
 ('guides/source/5_0_release_notes.md', 90),
 ('guides/source/active_storage_overview.md', 87),
 ('lib/arel/nodes.rb', 76),
 ('lib/arel/table.rb', 75),
 ('.rubocop.yml', 71),
 ('activestorage/CHANGELOG.md', 71),
 ('History.txt', 69),
 ('test/test_table.rb', 68),
 ('guides/source/action_cable_overview.md', 64),
 ('lib/arel/visitors/dot.rb', 56),
 ('activerecord/lib/active_record/connection_adapters/mysql/database_statements.rb',
  50),
 ('railties/lib/rails/commands/server/server_command.rb', 49),
 ('activejob/lib/active_job/exceptions.rb', 47),
 ('guides/source/5_1_release_notes.md', 45),
 ('lib/active_storage/service/disk_service.rb', 45),
 ('actionpack/lib/action_dispatch/system_test_case.rb', 44),
 ('test/attributes/test_attribute.rb', 42),
 ('actioncable/test/client_test.rb', 41),
 

In [27]:
def module_from_path(path):
    return 

def module_activity(url):
    module_activity = defaultdict(int)

    for path, count in commit_counts.items():
        if ".rb" in str(path):
            l2_module = path
            module_activity[l2_module] += count

    return module_activity

module_activity = module_activity(DEVOPS_REPOSITORY_URL)
sorted(module_activity.items(), key=lambda x: x[1], reverse=True)

[('app/minitwit.rb', 28),
 ('api/minitwit_sim_api.rb', 17),
 ('app/models.rb', 12),
 ('api/models.rb', 9),
 ('app/models/message.rb', 8),
 ('app/controllers/user_controller.rb', 7),
 ('app/bin/flag_tool.rb', 6),
 ('app/controllers/login_controller.rb', 6),
 ('app/controllers/register_controller.rb', 6),
 ('app/controllers/message_controller.rb', 5),
 ('app/models/user.rb', 5),
 ('app/controllers/application_controller.rb', 4),
 ('app/models/follower.rb', 4),
 ('stalker/lib/stalker.rb', 3),
 ('api/models/message.rb', 2),
 ('api/models/user.rb', 2),
 ('stalker/lib/stalker/writer.rb', 2),
 ('api/models/follower.rb', 1),
 ('app/config/app_environment.rb', 1),
 ('stalker/lib/stalker/connector.rb', 1),
 ('stalker/lib/stalker/formatter.rb', 1),
 ('stalker/lib/stalker/version.rb', 1),
 ('stalker/test/stalker_test.rb', 1),
 ('stalker/test/test_helper.rb', 1)]

In [28]:
most_active_modules = sorted(module_activity.items(), key=lambda x: x[1], reverse=True)

top_most_active_modules= [each[0] for each in most_active_modules][:5]
top_most_active_modules

['app/minitwit.rb',
 'api/minitwit_sim_api.rb',
 'app/models.rb',
 'api/models.rb',
 'app/models/message.rb']

In [None]:
def system_module(m):
    return m in top_most_active_modules

def module_size(m):
    return 30*module_activity[m]
