In [42]:
import math
import pandas as pd
import matplotlib.pyplot as plt
import time
from scipy.io import loadmat

In [92]:
import json
import random
import numpy as np
import scipy.stats as st

class DecisionModeler:
    
    def __init__(self):
        pass
    
    def p_cc2a(self, dp, k1, k2):
        a_given_a = st.norm.cdf(k1 + dp/2)
        b_given_a = 1 - st.norm.cdf(k2 + dp/2)
        a_given_b = st.norm.cdf(k1 - dp/2)
        b_given_b = 1 - st.norm.cdf(k2 - dp/2)
        
        pAA = a_given_a*b_given_a + b_given_a*a_given_a
        pBB = a_given_b*b_given_b + b_given_b*a_given_b
        pAB = a_given_a*b_given_b + b_given_a*a_given_b
        pBA = a_given_b*b_given_a + b_given_b*a_given_a

        return pAA, pBB, pAB, pBA
    
    def generate_2_param_model(self, dp_vals, k_vals, p_function, filepath):
        model = dict()
        
        counter = progress = 0
        total = len(dp_vals)
        
        for dp in dp_vals:
            dp_list = []
            for k1 in k_vals:
                for k2 in [k for k in k_vals if k >= k1]:
                    dp_list.append(p_function(dp, k1, k2))
            model[dp] = dp_list
            
            counter += 1
            if counter/total >= progress+0.1:
                progress += 0.1
                print(f'{round(progress*100, 2)}% evaluated.')
            
        with open(filepath, 'w') as file:
            json.dump(model, file)
            print('Successfully written data to file.')
    
    def get_distance(self, pHuman, pModel):
        return sum([(pH-pM)**2 for pH, pM in zip(pHuman, pModel)])
    
    def get_chi_squared(self, pHuman, pModel):
        chi_squared = 0
        
        n = 18
        for pH, pM in zip(pHuman, pModel):
            p_avg = (pH + pM) / 2
            if p_avg == 0:
                return np.inf
            chi_squared += n*((pH-p_avg)**2 + (pM - p_avg)**2)/p_avg/(1-p_avg)
        
        return chi_squared
        #return sum([(pH-pM)**2/pH for pH, pM in zip(pHuman, pModel)])
    
    def search_dp(self, pHuman_vals, model_input):
        '''
        Routine:
            Scan dp values, (k1,k2) vals
            Minimize euclidean distance
            Keep track of chi-squared and best dp
        '''
        
        min_dist = np.inf
        min_chi_squared = None
        best_dp = 0
        best = None
        
        with open(model_input, 'rb') as file:
            model = json.load(file)
            
            dist_arr = np.empty(len(pHuman_vals))
            chi_squared_arr = np.empty(len(pHuman_vals))
            
            now = time.time()
            ev_count = 0
            counter = progress = 0
            total = len(model.keys())
            
            for dp in model.keys():
                dist_arr.fill(np.inf)
                chi_squared_arr.fill(np.inf)
                best_pts = np.empty((len(pHuman_vals), 4))

                for pModel in model[dp]:
                    for index, pHuman in enumerate(pHuman_vals):
                        dist = self.get_distance(pHuman, pModel)
                        chi_squared = self.get_chi_squared(pHuman, pModel)
                        if dist < dist_arr[index]:
                            best_pts[index] = pModel

                        dist_arr[index] = min(dist_arr[index], dist)
                        chi_squared_arr[index] = min(chi_squared_arr[index], chi_squared)
                        ev_count += 1
                if sum(dist_arr) < min_dist:
                    best = best_pts
                    min_dist = sum(dist_arr)
                    min_chi_squared = chi_squared_arr
                    best_dp = float(dp)
                counter += 1
                if counter/total >= progress+0.1:
                    progress += 0.1
                    print(f'{round(progress*100, 2)}% evaluated.')
                
        print(f'Completed in {round((time.time()-now)/60, 2)} minutes with {ev_count} evaluations.')
        print(f"Best dp: {best_dp}\tChi-squared: {sum([c for c in min_chi_squared if c != np.inf])}")
        print(f"Degrees of freedom: {len(pHuman_vals) - np.count_nonzero(min_chi_squared==np.inf)}")
        print("Closest points:")
        print(best)
        return best_dp, sum(chi_squared_arr)
                                       

In [45]:
points

array([[0.        , 0.        , 0.05555556, 0.22222222],
       [0.05555556, 0.05555556, 0.22222222, 0.27777778],
       [0.        , 0.16666667, 0.16666667, 0.22222222],
       [0.05555556, 0.05555556, 0.16666667, 0.33333333],
       [0.        , 0.        , 0.33333333, 0.38888889],
       [0.05555556, 0.05555556, 0.16666667, 0.5       ],
       [0.        , 0.        , 0.44444444, 0.66666667],
       [0.        , 0.05555556, 0.44444444, 0.38888889],
       [0.        , 0.11111111, 0.5       , 0.22222222],
       [0.        , 0.        , 0.22222222, 0.38888889],
       [0.        , 0.        , 0.11111111, 0.22222222],
       [0.05555556, 0.05555556, 0.22222222, 0.38888889],
       [0.        , 0.16666667, 0.16666667, 0.27777778],
       [0.05555556, 0.05555556, 0.22222222, 0.33333333],
       [0.        , 0.        , 0.38888889, 0.38888889],
       [0.05555556, 0.05555556, 0.27777778, 0.55555556],
       [0.05555556, 0.05555556, 0.55555556, 0.83333333],
       [0.05555556, 0.11111111,

In [None]:
modeler = DecisionModeler()

dp_vals = np.linspace(0, 2.5, 101)

points = []
dat = loadmat('data/raw/8/LJJ.mat')
types = ['AA', 'BB', 'AB', 'BA']
num_ratings = 3
for t in types:
    points.append(np.concatenate([dat[f'p_{t}_{n+1}Diff'][0] for n in range(num_ratings)]))
points = np.array(points).T

modeler.search_dp(points, model_input='cc2a_reduced.json')

10.0% evaluated.
20.0% evaluated.
30.0% evaluated.
40.0% evaluated.
50.0% evaluated.


In [55]:
modeler = DecisionModeler()

dp_vals = np.linspace(0, 2.5, 101)
k_vals = np.linspace(-3, 3, 301)
modeler.generate_2_param_model(dp_vals, k_vals, modeler.p_cc2a, 'cc2a_reduced.json')

10.0% evaluated.
20.0% evaluated.
30.0% evaluated.
40.0% evaluated.
50.0% evaluated.
60.0% evaluated.
70.0% evaluated.
80.0% evaluated.
90.0% evaluated.
100.0% evaluated.
Successfully written data to file.
