In [1]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install pygithub
!{sys.executable} -m pip install -U python-dotenv

%load_ext dotenv
%dotenv
import os
git_user = os.environ.get("GITHUB_USER")
git_password = os.environ.get("GITHUB_PASSWORD")


Requirement already up-to-date: python-dotenv in /anaconda3/lib/python3.6/site-packages


In [8]:
from github import Github
# using username and password
g = Github(git_user, git_password)

#ignore_repos=["CIS-2014-PingAccess-3.0-SDK-Demo","accounts","ldap-backup","suspend-resume","transactrules-accounts","transactrules-core"]

ignore_repos=["CIS-2014-PingAccess-3.0-SDK-Demo"]

parse_repos = []

# Then play with your Github objects:
for repo in g.get_user().get_repos():
    if(repo.language == "Java" and repo.name not in ignore_repos):
        parse_repos.append(repo)


In [9]:
import base64

def is_start_block_comment(line):
    return line.strip().startswith("/*")

def is_end_block_comment(line):
    return "*/" in line

def is_line_comment(line):
    return line.strip().startswith("//")

class Feature:
    def __init__(self, name , criteria):
        self.count = 0
        self.name = name
        self.criteria = criteria
    
    def apply(self,line):
        if(self.criteria(line)):
            self.count = self.count +1

class FeatureCounter:
    def __init__(self, file_name_filter):
        self.features = []
        self.file_name_filter = file_name_filter
        
    def applies_to(self, file_name):
        return self.file_name_filter(file_name)
    
    def apply_line(self,line):
        for feature in self.features:
            feature.apply(line)
        
    def add_feature(self,feature):
        self.features.append(feature)
        
    def display(self):
        for feature in self.features:
            print(feature.name, feature.count)
            
    def apply_file(self, file_content):
        text = base64.b64decode(file_content.content).decode('utf-8')
        lines = text.splitlines()
        
        in_block_comment = None
    
        for line in lines:
            if(is_start_block_comment(line)):
                in_block_comment = True

            if(in_block_comment and is_end_block_comment(line)):
                in_block_comment = None

            if(line and (not is_line_comment(line)) and (not in_block_comment)):
                self.apply_line(line)

def dir_count_feature(directory_content,ref, feature_counter):
    for content in directory_content.repository.get_contents(directory_content.path, ref):
        if(content.type=="dir"):
            dir_count_feature(content,ref, feature_counter)
        if(content.type=="file"):
            file_count_feature(content, feature_counter)

def file_count_feature(file_content, feature_counter):
    if(feature_counter.applies_to(file_content.name)):
        feature_counter.apply_file(file_content)

def repo_count_feature(repo,ref,feature_counter):
    for content in repo.get_contents("", ref):
        if(content.type=="dir"):
            dir_count_feature(content,ref, feature_counter)
        if(content.type=="file"):
            file_count_feature(content, feature_counter)




In [None]:
def create_java_rest_counter():
    counter = FeatureCounter(lambda filename: filename.endswith(".java"))

    counter.add_feature(Feature("line counter", lambda line: True))
    counter.add_feature(Feature("controller", lambda line: "@RestController" in line))
    counter.add_feature(Feature("REST operation", lambda line: ("@RequestMapping" in line) and ("method" in line)))
    
    return counter

import pandas as pd

feature_list = []
feature_columns = ['repo','date']

feature_counter = create_java_rest_counter()

for feature in feature_counter.features:
    feature_columns.append(feature.name)

for repo in parse_repos:
    for commit in repo.get_commits():
        print(f"- processing {repo.name} {commit.commit.author.date} ({commit.commit.author.name}) {commit.commit.message}")
        feature_counter = create_java_rest_counter()
        repo_count_feature(repo, commit.sha, feature_counter)
        
        features = []
        
        features.append(repo.name)
        features.append(commit.commit.author.date)
        
        for feature in feature_counter.features:
            features.append(feature.count)

        feature_list.append(features)
        
        #print(f" {commit.commit.author.name} => {commit.commit.author.date} : {commit.commit.message} ({commit.sha})")
        #feature_counter.display()
        
df = pd.DataFrame(feature_list, columns=feature_columns)

#writer = pd.ExcelWriter('output.xlsx')
#df.to_excel(writer,'Sheet1')
#writer.save()

df.to_csv('output.csv', encoding='utf-8', index=False)

- processing accounts 2017-11-17 10:55:15 (igormusic) Fixed code gen and unit tests
- processing accounts 2017-11-15 13:28:01 (igormusic) Map Long vs. String - pending fixing of the unit tests
- processing accounts 2017-11-13 01:17:45 (igormusic) Implemented Java runtime code gen
- processing accounts 2017-11-12 22:57:19 (igormusic) Implemented Java runtime code gen
- processing accounts 2017-11-12 18:46:17 (igormusic) Implemented Java runtime code gen
- processing accounts 2017-08-15 20:34:58 (Igor Music) Scripting - work in progress
- processing accounts 2017-08-11 20:02:48 (Igor Music) scripting
- processing accounts 2017-08-07 19:46:23 (Igor Music) Added Axon code - work in progress
- processing accounts 2017-08-07 14:17:08 (Igor Music) Added unit tests
- processing accounts 2017-08-05 15:23:25 (Igor Music) Added schedules
- processing accounts 2017-08-03 11:48:01 (Igor Music) Changed Positions to Map
- processing accounts 2017-08-02 13:41:20 (Igor Music) Removed code generation ap