# Machine learning - Predict error class propability based on the dev-team

In [2]:
import re
from datetime import datetime
import matplotlib.pyplot as plt
from dateutil.parser import parse
import numpy as np
import json
import sys
from random import randint
from collections import defaultdict
from pprint import pprint
from math import floor

# Configuration
file_owner = 'casper'
root_folder1 = 'htdocs'
root_folder2 = 'Documents'
fig_width = 28

class Project(object):
    def __init__(self, name):
        self.name = name
        self.error_count = 0
        self.user_story_count = randint(50,250)
        self.error_classes = defaultdict(lambda: 0)
        
    def add_error(self, error):
        self.error_count += 1
        self.error_classes[error] += 1
        
    def get_error_classes(self):
        return self.error_classes

    def get_error_count(self):
        return self.error_count
    
    def get_errors(self):
        return list(self.error_classes.keys())
    
        
class Member():
    def __init__(self, name):
        self.name = name
        self.projects = self.get_projects()
        self.error_stats = {}
        
    def get_project_mean_error_probability(self, project):
        error_probabilities = {}
        for error in project.get_error_classes():
            if project.error_classes[error]:
                error_probabilities[error] = project.error_classes[error] / project.error_count * 100
            else:
                error_probabilities[error] = 0
        return error_probabilities
        
    def get_mean_error_probability(self, error):
        error_probability = 0
        for project in self.projects:
            project_error_probabilities = self.get_project_mean_error_probability(project)
            if error in project_error_probabilities:
                error_probability += project_error_probabilities[error]
        return error_probability / len(self.projects)
    
    def calculate_error_stats(self, error_list):
        for error in error_list:
            self.error_stats[error] = format(self.get_mean_error_probability(error), '.2f')
        return self.error_stats
    
    def get_projects(self):
        projects = []
        if self.name == "Alex":
            projects.append(get_project_by_name("VdValk"))
            projects.append(get_project_by_name("oppstap"))
            projects.append(get_project_by_name("greuter"))
            
        elif self.name == "Dennis":
            projects.append(get_project_by_name("oppstap"))
            projects.append(get_project_by_name("botblecms"))
            
        elif self.name == "Casper":
            projects.append(get_project_by_name("VdValk"))
            projects.append(get_project_by_name("oppstap"))
            projects.append(get_project_by_name("greuter"))
            projects.append(get_project_by_name("botblecms"))
            projects.append(get_project_by_name("ariez"))
            projects.append(get_project_by_name("aogbay_2"))
            projects.append(get_project_by_name("hoteltotaal"))
            projects.append(get_project_by_name("aogbay"))
            projects.append(get_project_by_name("snippets"))
            
        elif self.name == "Sjoerd":
            projects.append(get_project_by_name("VdValk"))
            projects.append(get_project_by_name("oppstap"))
            projects.append(get_project_by_name("ariez"))
            projects.append(get_project_by_name("aogbay_2"))
        return projects
    
    def get_error_stats(self):
        pprint(self.error_stats)
        
    def get_total_error_count(self):
        count = 0
        for project in self.projects:
            count += project.get_error_count()
        pprint(count)

# restricted_days_list = ['27-Mar-2016', '30-Apr-2016']
project_list = []
error_class_list = ["No such file or directory", 
                 "Illegal string offset", 
                 "not found", 
                 "Failed opening required", 
                 "does not exist", 
                 "Uncaught TypeError", 
                 "Call to undefined function", 
                 "foreign key constraint", 
                 "Trying to get property of non-object", 
                 "Integrity constraint violation", 
                 "syntax error", 
                 "Access denied for user", 
                 "Undefined offset",
                 "Uncaught ReflectionException"
                ]

infile = r'../include/logs/php_error_' + file_owner + '.log'

def find_between( s, first, last ):
        try:
            start = s.index( first ) + len( first )
            end = s.index( last, start )
            return s[start:end]
        except ValueError:
            return ""
        
def get_project_name(line):
    check1 = find_between( line, root_folder1 + "/", "/" )
    check2 = find_between( line, root_folder2 + "/", "/" )
    if check1:
       return check1
    elif check2:
        return check2
    
def get_error_class(line):
    
    for error_class in error_class_list:
        if error_class in line:
            return error_class
        else:
            continue
    return 'Unknown'

def get_date(new_date, old_date, date_type):
    date1 = datetime.strptime(str(new_date), '%d-%b-%Y')
    date2 = datetime.strptime(str(old_date), '%d-%b-%Y')
    
    if date_type == 'first':
        if date1 < date2:
            return date1
        else: 
            return date2
    elif date_type == 'last':
        if date1 > date2:
            return date1
        else:
            return date2
    

    

def get_project_by_name(name):
    for project_obj in project_list:
        if project_obj.name == name:
            return project_obj
    
    
def add_project(project_obj):
    if not any(project.name == project_obj.name for project in project_list): # Check if project is already added
        project_list.append(project_obj)
    return project_name

        
def make_project(name):
    project = Project(name)
    return project


def obj_dict(obj):
    return obj.__dict__

with open(infile) as f:
    f = f.readlines()

    
    for idx, line in enumerate(f):
            match = re.findall(r"([[].+)", line)
            if match:
                project_name = get_project_name(line)
                if project_name:
                    existing_project = get_project_by_name(project_name)
                    error_class = get_error_class(line)
                    if existing_project:
                        existing_project.add_error(error_class)
                    else:
                        project = make_project(project_name)
                        add_project(project)
                        project.add_error(error_class)
                        
                            

json_string = json.dumps(project_list, default=obj_dict, indent=4)
# print(json_string);

dennis = Member("Dennis")
alex = Member("Alex")
casper = Member("Casper")
sjoerd = Member("Sjoerd")

dennis.calculate_error_stats(error_class_list)
alex.calculate_error_stats(error_class_list)
casper.calculate_error_stats(error_class_list)
sjoerd.calculate_error_stats(error_class_list)

dev_team = [dennis, alex]

def get_error_occurence_probability(team, errors):
    error_probabilities = {}
    for error in errors:
        error_probability = 0
        for member in team:
            error_probability += member.get_mean_error_probability(error)
            error_probabilities[error] = format(error_probability / len(team), '.1f')
    pprint(error_probabilities)
    
get_error_occurence_probability(dev_team, error_class_list)

{'Access denied for user': '0.0',
 'Call to undefined function': '0.0',
 'Failed opening required': '9.2',
 'Illegal string offset': '0.0',
 'Integrity constraint violation': '0.0',
 'No such file or directory': '9.2',
 'Trying to get property of non-object': '0.0',
 'Uncaught ReflectionException': '0.0',
 'Uncaught TypeError': '0.0',
 'Undefined offset': '0.0',
 'does not exist': '8.3',
 'foreign key constraint': '0.0',
 'not found': '48.3',
 'syntax error': '15.3'}
