In [6]:
from github import Github, GithubException, Repository, ContentFile
import re
from typing import List
import pandas as pd
from pandas import DataFrame as df
import numpy as np
import csvtotable

In [9]:
#Remove access key before commiting to GitHub
github = Github("dummy")

In [163]:
mtd_team_repos = github.get_organization("hmrc").get_team(1996975).get_repos()

In [175]:
class Program:
    @staticmethod
    def getMicroserviceVersions(repo: Repository):
        try:
            app_folder_contents = repo.get_contents("app")
            microservice_versions = []  
            for item in app_folder_contents:
                version_regex = "app/v[\d]+"
                if re.match(version_regex, item.path) is not None:
                    microservice_versions.append(item.path[4:])
            return microservice_versions
        except Exception as ex:
            raise ex
    
    @staticmethod
    def getErrorSummaries(mtd_team_repos: List[Repository.Repository]):
        error_summaries = []
        for mtd_team_repo in mtd_team_repos:
            try:
                mtd_team_repo.get_contents("app")
                microservice_versions = Program.getMicroserviceVersions(mtd_team_repo)
                for microservice_version in microservice_versions:
                    error_summaries.append(ErrorSummary(mtd_team_repo, microservice_version))
            except GithubException as ex:
                if not(ex.status == 404):
                    raise ex
            except Exception as ex:
                raise ex
        return error_summaries
    
    def dataAsArray(self):
        data_as_array = np.asarray(["repo_name", "microservice_version", "has_paths", "error_name", "error_code", "error_message"])
        for item in self.data:
            for error in item.errors:
                error_row = np.array([item.repo_name, item.microservice_version, item.has_paths, error.error_name, error.error_code, error.error_message])
                data_as_array = np.vstack((data_as_array, error_row))
        return data_as_array
              
    def __init__(self, repos: List[Repository.Repository]):
        self.data = Program.getErrorSummaries(repos)

In [176]:
class ErrorSummary:
    @staticmethod
    def getErrorContent(raw_error_content: ContentFile):
        unwanted_content_regex = "\/\*\n(.*\*.*\n)*.*\*\/|.*\/\/.*\n|package.*\n|import.*\n"
        decoded_error_content = raw_error_content.decoded_content.decode("utf-8")
        trimmed_error_content = re.sub(unwanted_content_regex, "", decoded_error_content)
        return trimmed_error_content
    
    @staticmethod
    def hasPaths(repo: Repository, microservice_version: str):
        try:
            matching_params = []
            error_model_folder_contents = repo.get_contents("app/" + microservice_version + "/models/errors")
            for item in error_model_folder_contents:
                item_error_content = ErrorSummary.getErrorContent(item)
                error_case_class_params_regex = "(?<=case class MtdError\().*?(?=\))|(?<=case class Error\().*?(?=\))"
                error_case_class_params = re.findall(error_case_class_params_regex, item_error_content, re.DOTALL)
                if len(error_case_class_params) <= 1:  
                    matching_params += error_case_class_params
                else:
                    raise RuntimeError("[ErrorSummary][hasPaths] -")
            if len(matching_params) == 1: 
                error_case_class_params = matching_params[0].split(",")
                for error_case_class_param in error_case_class_params:
                    if "paths" in error_case_class_param:
                        return True
                return False
            else:
                raise RuntimeError("[ErrorSummary][hasPaths] -")
        except Exception as ex:
            raise ex
    
    @staticmethod
    def getErrors(repo: Repository, microservice_version: str):
        error_object_regex = "object *[A-Za-z]* *extends *[A-Za-z]*Error\(.*?\)"
        error_model_folder_contents = repo.get_contents("app/" + microservice_version + "/models/errors")
        errors = []
        for item in error_model_folder_contents:
            raw_error_strings = re.findall(error_object_regex, ErrorSummary.getErrorContent(item), re.DOTALL)
            for raw_error_string in raw_error_strings:
                errors.append(Error(raw_error_string.replace("\n", "")))
        if errors != []:
            return errors
        else:
            raise RuntimeError("[ErrorSummary][getErrors]")
            
    def __init__(self, repo: Repository, microservice_version: str):
        self.repo_name = repo.name
        self.microservice_version = microservice_version
        self.has_paths = ErrorSummary.hasPaths(repo, microservice_version)
        self.errors = ErrorSummary.getErrors(repo, microservice_version)

In [191]:
class Error:
    @staticmethod
    def getErrorName(raw_error_string: str):
        error_name = re.search("(?<=object).*(?=.*extends.*Error\(.*?\))", raw_error_string)
        return error_name.group().replace(" ","")
    
    @staticmethod
    def getErrorCode(raw_error_string: str):
        error_code = re.search('(?<=( |\(|=|,)")[A-Z_]*(?=")', raw_error_string)
        return error_code.group()
    
    @staticmethod
    def getErrorMessage(raw_error_string: str):
        error_message = re.findall('"[^,]*?.*?"', raw_error_string)
        try:
            return error_message[1].replace('"',"")
        except Exception as ex:
            raise ex
    
    def __init__(self, raw_error_string: str):
        self.error_name = Error.getErrorName(raw_error_string)
        self.error_code = Error.getErrorCode(raw_error_string)
        self.error_message = Error.getErrorMessage(raw_error_string)  

In [192]:
ans = Program(mtd_team_repos)

In [193]:
data_as_array = ans.dataAsArray()
data_as_df = df(data = data[1:], columns = data[0])

In [5]:
data_as_df.to_csv("file.csv", index = False)