In [9]:
#import library
from pathlib import Path
from os import listdir
import json
import pandas as pd
import numpy as np


class ScoringObject(object):
    """
        A functional object to be used to score dabble entries 
         
        Attributes of Class:
        configData: Dictionary of configuration data 
        df_SubData: List of team submissions
        df_CompTrueRes: The true competions results
        scores: A list of scores that compare submitted results to the true competion results 
        
        Attributes of Class Instance:       
            TeamID: pointer to Team ID, use to score individual team submission
            Scores: dict of submission score
    """
    
    #configuration data
    configData = dict()
    list_submissionData = list() 
    compTrueRes = pd.DataFrame()
    scores = list()
    
    #Define the class constructor    
    def __init__(self, configPath='', TeamID=''):
               
        #Set parameters based on inputs"        
        self.configPath = Path(configPath)    
        self.TeamID = TeamID
        
        #if config file exists, else error out
        if self.configPath.is_file():

            #load scoring config data 
            self.load_scoring_config()
                             
            #load data
            self.load_scoring_data()
            
            if self.configData['score_metrics']['score_answers']['required']:
                #run scoring_answers
                self.score_answers()
                print(self.scores)
                
                
            if self.configData['score_metrics']['score_code']['required']:
                #run scoring_answers
                #self.score_code()
                print(2)
                
        else:
            #error out with print statment
            print("Error: Config File does not exist")
            
     
    def load_scoring_config(self): #A function to load configuration file
        
        #if file exist load configuration json into self.configData
        with open(str(self.configPath)) as json_data:
            self.configData = json.load(json_data)

        #make sure that scoring config data is present
        self.check_scoring_config()
        
        
    def load_scoring_data(self): #A function to load submission score data
        
        # load Competion data
        if Path(self.configData['score_metrics']['solution_file']).is_file():
            self.compTrueRes = pd.read_csv(self.configData['score_metrics']['solution_file'])
        else:
            print("Error: solution_file path or name not correct")
           
        # load team submissions data
        if Path(self.configData['score_metrics']['submission_dir'] + self.TeamID + "/").is_dir(): #check to make sure dir exists
            
            p = Path(self.configData['score_metrics']['submission_dir'] + self.TeamID + "/Submission/")
            for x in p.iterdir(): #load data in dir into list, data is in pd format
                self.list_submissionData.append([str(x), pd.read_csv(x)])
            

    def check_scoring_config(self): #A function to check the structure of the configuration data
        
        #check the file to make sure we have the key feilds needed
        if 'score_metrics' not in self.configData:
            #if no score metrics, print error
            print("Error: score_metrics not in Config File")
    
    
    def score_answers(self): #A function to compare submitted answers against true competition answers
        
        #score each submission, calculate metrics of interest
        for d in self.list_submissionData:
            #create a list to hold results
            
            if self.configData['score_metrics']['Classification']['accuracy']['required']:
                if self.configData['score_metrics']['Classification']['accuracy']['type'].lower() == 'scikit-learn': #use scikit-learn classification accuracy
                
                    from sklearn.metrics import accuracy_score as score
                    
                    #convert df to numpy array and score via scikit-learn accuracy_score
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'accuracy', s])
                    
                
            if self.configData['score_metrics']['Classification']['average_precision']['required']:
                if self.configData['score_metrics']['Classification']['average_precision']['type'].lower() == "scikit-learn": #use scikit-learn classification average_precision
                                    
                    from sklearn.metrics import average_precision_score as score
                    
                    #convert df to numpy array and score via scikit-learn average_precision
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'average_precision', s])
                    
            if self.configData['score_metrics']['Classification']['f1']['required']:
                if self.configData['score_metrics']['Classification']['f1']['type'].lower() == "scikit-learn": #use scikit-learn classification f1
                    
                    from sklearn.metrics import f1_score as score
                    
                    #convert df to numpy array and score via scikit-learn f1_score
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                      
                    if self.configData['score_metrics']['Classification']['f1']['average'].lower() == "micro":
                        s = score(yTrue, yPred, average='micro')
                        self.scores.append([d[0],'f1_micro', s])
                    elif self.configData['score_metrics']['Classification']['f1']['average'].lower() == "macro":
                        s = score(yTrue, yPred, average='macro')
                        self.scores.append([d[0],'f1_macro', s])
                    elif self.configData['score_metrics']['Classification']['f1']['average'].lower() == "weighted":
                        s = score(yTrue, yPred, average='weighted')
                        self.scores.append([d[0],'f1_weighted', s])
                    elif self.configData['score_metrics']['Classification']['f1']['average'].lower() == "samples":
                        s = score(yTrue, yPred, average='samples')
                        self.scores.append([d[0],'f1_samples', s])
                    else:
                        s = score(yTrue, yPred, average='binary')
                        self.scores.append([d[0],'f1_binary', s])
                        

            if self.configData['score_metrics']['Classification']['log_loss']['required']:
                if self.configData['score_metrics']['Classification']['log_loss']['type'].lower() == "scikit-learn": #use scikit-learn classification neg_log_loss
                    
                    from sklearn.metrics import log_loss as score
                    
                    #convert df to numpy array and score via scikit-learn log_loss
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'log_loss', s])

            if self.configData['score_metrics']['Classification']['precision']['required']:
                if self.configData['score_metrics']['Classification']['precision']['type'].lower() == "scikit-learn": #use scikit-learn classification precision
                    
                    from sklearn.metrics import precision_score as score
                    
                    #convert df to numpy array and score via scikit-learn precision
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'precision', s])

            if self.configData['score_metrics']['Classification']['recall']['required']:
                if self.configData['score_metrics']['Classification']['recall']['type'].lower() == "scikit-learn": #use scikit-learn classification recall
                    
                    from sklearn.metrics import recall_score as score
                    
                    #convert df to numpy array and score via scikit-learn recall
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'recall', s])

            if self.configData['score_metrics']['Classification']['roc_auc']['required']:    
                if self.configData['score_metrics']['Classification']['roc_auc']['type'].lower() == "scikit-learn": #use scikit-learn classification roc_auc
                    
                    from sklearn.metrics import roc_auc_score as score
                    
                    #convert df to numpy array and score via scikit-learn roc_auc
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'roc_auc', s])

            if self.configData['score_metrics']['Clustering']['adjusted_rand_score']['required']:
                if self.configData['score_metrics']['Clustering']['adjusted_rand_score']['type'].lower() == "scikit-learn": #use scikit-learn Clustering adjusted_rand_score
                    
                    from sklearn.metrics import adjusted_rand_score as score
                    
                    #convert df to numpy array and score via scikit-learn adjusted_rand_score
                    #yTrue = self.compTrueRes.as_matrix()
                    #yPred = d[1].as_matrix()
                    #s = score(yTrue, yPred)
                    
                    #output result
                    #self.scores.append([d[0],'adjusted_rand_score', s])

            if self.configData['score_metrics']['Regression']['mean_absolute_error']['required']:
                if self.configData['score_metrics']['Regression']['mean_absolute_error']['type'].lower() == "scikit-learn": #use scikit-learn Regression neg_mean_absolute_error
                    
                    from sklearn.metrics import mean_absolute_error as score
                    
                    #convert df to numpy array and score via scikit-learn mean_absolute_error
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'mean_absolute_error', s])

            if self.configData['score_metrics']['Regression']['mean_squared_error']['required']:
                if self.configData['score_metrics']['Regression']['mean_squared_error']['type'].lower() == "scikit-learn": #use scikit-learn Regression neg_mean_absolute_error
                    
                    from sklearn.metrics import mean_squared_error as score
                    
                    #convert df to numpy array and score via scikit-learn mean_squared_error
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'mean_squared_error', s])
                    
            if self.configData['score_metrics']['Regression']['median_absolute_error']['required']:
                if self.configData['score_metrics']['Regression']['median_absolute_error']['type'].lower() == "scikit-learn": #use scikit-learn Regression neg_median_absolute_error
                    
                    from sklearn.metrics import median_absolute_error as score
                    
                    #convert df to numpy array and score via scikit-learn median_absolute_error
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'median_absolute_error', s])

            if self.configData['score_metrics']['Regression']['r2']['required']:
                if self.configData['score_metrics']['Regression']['r2']['type'].lower() == "scikit-learn": #use scikit-learn Regression r2
                    
                    from sklearn.metrics import r2_score as score
                    
                    #convert df to numpy array and score via scikit-learn r2_score
                    yTrue = self.compTrueRes.as_matrix()
                    yPred = d[1].as_matrix()
                    s = score(yTrue, yPred)
                    
                    #output result
                    self.scores.append([d[0],'r2', s])
                
                
    def score_code(self): #A function to run submitted code and compare answers against true competition answers 
        #code
        print(3)

In [6]:
#create scoring object
dabbleScoreObj = ScoringObject("/home/john/Projects/Dabble/Configs/config_comp0.json","Team0") 

[['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'accuracy', 0.98241590214067276], ['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'average_precision', 0.98574595674529486], ['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'f1_binary', 0.97641025641025636], ['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'log_loss', 0.60733322406953139], ['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'precision', 1.0], ['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'recall', 0.95390781563126248], ['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'roc_auc', 0.9769539078156313], ['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'mean_absolute_error', 0.017584097859327217], ['/home/john/Projects/Dabble/TeamCode/Team0/Submission/Team0_sub1.csv', 'mean_squared_error', 0.017584097859327217], ['/home/john/Projects/Dabble/Te

In [8]:
dabbleScoreObj = ScoringObject("/home/john/Projects/Dabble/Configs/config_comp0.json","Team1") 

[['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'accuracy', 0.99159021406727832], ['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'average_precision', 0.99318284887818453], ['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'f1_binary', 0.98885511651469093], ['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'log_loss', 0.29046371585934161], ['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'precision', 1.0], ['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'recall', 0.97795591182364727], ['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'roc_auc', 0.98897795591182369], ['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'mean_absolute_error', 0.0084097859327217118], ['/home/john/Projects/Dabble/TeamCode/Team1/Submission/Team0_sub1.csv', 'mean_squared_error', 0.0084097859327217118], ['/home/john/Projects/Dabble

In [10]:
dabbleScoreObj = ScoringObject("/home/john/Projects/Dabble/Configs/config_comp0.json","Team2") 

[['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'accuracy', 0.99617737003058104], ['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'average_precision', 0.9957605179778517], ['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'f1_binary', 0.994994994994995], ['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'log_loss', 0.13203079569330681], ['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'precision', 0.99399999999999999], ['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'recall', 0.99599198396793587], ['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'roc_auc', 0.99614185106925834], ['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'mean_absolute_error', 0.0038226299694189602], ['/home/john/Projects/Dabble/TeamCode/Team2/Submission/Team0_sub1.csv', 'mean_squared_error', 0.0038226299694189602], ['/home/john/Pr