# An implementation of the Ant miner paper for Emotion Recognition

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy

In [None]:
df = pd.read_csv("Resources/Ravdess Dataset.csv")
df

In [None]:
n_candidate_thresholds = 10

In [None]:
candidate_thresholds = df.quantile(np.linspace(0, 1, n_candidate_thresholds))
candidate_thresholds

In [None]:
emotions = df['class'].unique()
emotions

In [None]:
cols = df.columns

In [None]:
GE_df = pd.DataFrame(columns=cols)
GE_df = GE_df.drop('class', axis=1)
LT_df = GE_df.copy(deep=True)
GE_df

In [None]:
GE = np.zeros((n_candidate_thresholds, len(cols) - 1, len(emotions)))
LT = GE.copy()
GE.shape

In [None]:
for e, emotion in enumerate(emotions):
    for i, (quantile, row) in enumerate(candidate_thresholds.iterrows()):
        GE_df.loc[i] = df.loc[df['class'] == emotion].drop('class', axis=1).ge(candidate_thresholds.iloc[i]).sum()
        LT_df.loc[i] = df.loc[df['class'] == emotion].drop('class', axis=1).lt(candidate_thresholds.iloc[i]).sum()
    GE[:, :, e] = GE_df.to_numpy()
    LT[:, :, e] = LT_df.to_numpy()

In [None]:
train, test = np.split(df.sample(frac=1), [int(0.75*len(df))])
train_len = len(train)
test_len = len(test)
total_len = train_len + test_len
test_len, train_len, total_len

# Entropy values
H will be a matrix of size (1 x features). Computed in a vectorized way to save time (This would take like 10 mins if it was in a for loop, now it takes 10s). <br>
Ok fine I'm looping through the 6 emotions so techhhhnically it's not fully vectorized. Anyway we're computing H only once so I took lite

In [None]:
sums_GE = np.sum(GE, axis=2)
sums_LT = np.sum(LT, axis=2)
#sums_GE

In [None]:
repeated_sums_GE = np.repeat(sums_GE[:, :, np.newaxis], GE.shape[2], axis=2)
repeated_sums_LT = np.repeat(sums_LT[:, :, np.newaxis], LT.shape[2], axis=2)
#repeated_sums_GE[:, :, 1]

In [None]:
P_GE = np.divide(GE, repeated_sums_GE, out=np.zeros_like(GE), where=repeated_sums_GE!=0)
P_LT = np.divide(LT, repeated_sums_LT, out=np.zeros_like(LT), where=repeated_sums_LT!=0)
#P_GE

In [None]:
logP_GE = np.log2(P_GE, out=np.zeros_like(P_GE), where=P_GE>0)
logP_LT = np.log2(P_LT, out=np.zeros_like(P_LT), where=P_LT>0)
#logP_GE

In [None]:
GE = -np.sum(np.multiply(P_GE, logP_GE), axis=2)
LT = -np.sum(np.multiply(P_LT, logP_LT), axis=2)

In [None]:
epv = sums_LT * LT / (sums_GE + sums_LT) + sums_GE * GE / (sums_GE + sums_LT)
epv.shape

In [None]:
entropies = np.minimum(
                GE[np.argmin(epv, axis=0), np.array(range(len(cols) - 1))],
                LT[np.argmin(epv, axis=0), np.array(range(len(cols) - 1))]
            )
#entropies

In [None]:
vis = LT[:, :-2]
vis = vis.reshape(65,100)
plt.imshow(vis)
plt.colorbar()
plt.show()

In [None]:
vis = GE[:, :-2]
vis = vis.reshape(65,100)
plt.imshow(vis)
plt.colorbar()
plt.show()

In [None]:
vis = entropies[:-2]
vis = vis.reshape(65,10)
plt.imshow(vis)
plt.colorbar()
plt.show()

In [None]:
%load_ext line_profiler
def calc_threshold_and_operator(dataset):
    GE_df = pd.DataFrame(columns=[dataset.columns[0]])
    LT_df = GE_df.copy(deep=True)
    GE = np.zeros((n_candidate_thresholds, 1, len(emotions)))
    LT = GE.copy()
    cands = candidate_thresholds[dataset.columns[0]]
    
    for e, emotion in enumerate(emotions):
        for i, (quantile, row) in enumerate(candidate_thresholds.iterrows()):
            GE_df.loc[i] = dataset.loc[dataset['class'] == emotion].drop('class', axis=1).ge(cands.iloc[i]).sum()
            LT_df.loc[i] = dataset.loc[dataset['class'] == emotion].drop('class', axis=1).lt(cands.iloc[i]).sum()
        GE[:, :, e] = GE_df.to_numpy()
        LT[:, :, e] = LT_df.to_numpy()
        
    sums_GE = np.sum(GE, axis=2)
    sums_LT = np.sum(LT, axis=2)
    
    repeated_sums_GE = np.repeat(sums_GE[:, :, np.newaxis], GE.shape[2], axis=2)
    repeated_sums_LT = np.repeat(sums_LT[:, :, np.newaxis], LT.shape[2], axis=2)
    
    P_GE = np.divide(GE, repeated_sums_GE, out=np.zeros_like(GE), where=repeated_sums_GE!=0)
    P_LT = np.divide(LT, repeated_sums_LT, out=np.zeros_like(LT), where=repeated_sums_LT!=0)
    
    logP_GE = np.log2(P_GE, out=np.zeros_like(P_GE), where=P_GE>0)
    logP_LT = np.log2(P_LT, out=np.zeros_like(P_LT), where=P_LT>0)
    
    GE = -np.sum(np.multiply(P_GE, logP_GE), axis=2)
    LT = -np.sum(np.multiply(P_LT, logP_LT), axis=2)
    
    epv = sums_LT * LT / (sums_GE + sums_LT) + sums_GE * GE / (sums_GE + sums_LT)
    vbest = np.argmin(epv, axis=0)
    
    if GE[vbest, 0] < LT[vbest, 0]:
        return float(cands.iloc[vbest]), '>='
    
    return float(cands.iloc[vbest]), '<'

In [None]:
features = df.drop('class', axis=1).columns
logK = np.log2(len(emotions))

In [None]:
class Rule:
    def __init__(self, train_set):
        self.terms = {}
        #{1:1, 18:1}
        #{1:('>=', 13), 2:('<', 0.2)}
        self.emotion = None
        self.numerosity = 0
        self.train_set = train_set
        self.match_set = train_set
    
#     def match(self, instance):
#         for key, value in instance.items():
#             if key in self.terms and self.terms[key] != value:
#                 return False
#         return True
    
#     def match_set(self):
#         gdic = {'f' + str(key): val[1] for key, val in terms.items() if val[0] == '>'}
#         ldic = {'f' + str(key): val[1] for key, val in terms.items() if val[0] == '<'}
#         g = train_set.loc[np.all(train_set[list(gdic)] > pd.Series(gdic), axis=1)]
#         return g.loc[np.all(g[list(ldic)] <= pd.Series(ldic), axis=1)]
    
    def new_match_set(self, term):
        if term['value'][0] == '>=':
            return self.match_set.loc[self.match_set['f' + str(term['feature'])] >= term['value'][1]]
        return self.match_set.loc[self.match_set['f' + str(term['feature'])] < term['value'][1]]
    
    def unmatch_set(self):
        return pd.concat([self.train_set, self.match_set, self.match_set]).drop_duplicates(keep=False)
    
    def correct_set(self):
        return self.match_set.loc[self.match_set['class'] == self.emotion]
    
    def exists(self, feature):
        return feature in self.terms
        
    def addTerm(self, term, new_matches):
        self.terms[term['feature']] = term['value']
        self.match_set = new_matches
        self.numerosity += 1
        
    def computeTerm(self, attribute):
        pass
        
    def quality(self):
        matches = self.match_set
        unmatches = self.unmatch_set()
        TP = len(matches.loc[matches['class'] == self.emotion])
        FP = len(matches.loc[matches['class'] != self.emotion])
        FN = len(unmatches.loc[unmatches['class'] == self.emotion])
        TN = len(unmatches.loc[unmatches['class'] != self.emotion])
        
        sens = TP / (TP + FN)
        spec = TN / (FP + TN)
        
        return sens * spec
        
    def prune(self):
        pass
        

In [None]:
def normalize_probs(probs):
    return probs / np.sum(probs)

In [None]:
class Ant:
    def __init__(self, index, train_set, n_tries, min_coverage):
        self.index = index
        self.rule = Rule(train_set)
        self.n_tries = n_tries
        self.min_coverage = min_coverage
    
    def traverse(self, T):
        Eta = logK - entropies

        Probs = np.multiply(Eta, T)
        Probs = normalize_probs(Probs)
        
        should_continue = True
        while(should_continue):
            tries = self.n_tries
            while(tries):
                chosen = int(np.random.choice(len(Probs), 1, p=Probs))
                v, op = calc_threshold_and_operator(self.rule.match_set[['f' + str(chosen), 'class']])
                term = {'feature': chosen, 'value': (op, v)}
                new_cov = self.rule.new_match_set(term)
                if len(new_cov) >= self.min_coverage:
                    self.rule.addTerm(term, new_cov)
                    print("Added " + str(term))
                    
                    Probs[term['feature']] = 0
                    Probs = normalize_probs(Probs)
                    
                    should_continue = True
                    break
                tries -= 1
                should_continue = False
                
    def set_emotion(self):
        matches = self.rule.match_set
        self.rule.emotion = matches['class'].mode()[0]
        

In [None]:
class World:
    def __init__(
        self,
        min_coverage = 10,
        max_uncovered = 10,
        n_ants = 500,
        max_dups = 1,
        chances_before_brutally_murdering_ant = 10,
        train_set = train.copy(deep=True),
    ):
    
        self.min_coverage = min_coverage
        self.max_uncovered = max_uncovered
        self.n_ants = n_ants
        self.max_dups = max_dups
        self.chances_before_brutally_murdering_ant = chances_before_brutally_murdering_ant
        self.train_set = train_set
        self.pheromone_map = np.ones_like(entropies) * (1 / len(features))
        self.discovered_rule_list = []

    def deposit_pheromones(self, new_pheromones):
        self.pheromone_map = self.pheromone_map + new_pheromones
        
    def evaporate_pheromones(self):
        self.pheromone_map = self.pheromone_map / np.sum(self.pheromone_map)
        
    def plot_pheromones(self, a):
        vis = self.pheromone_map[:-1]
        vis = vis.reshape(31,21)
        plt.imshow(vis)
        plt.colorbar()
        plt.savefig('Plots/Generation' + str(len(self.discovered_rule_list)) + 'ant' + str(a))
        plt.clf()
    
    def dispatch_ants(self):
        prev_rules = []
        prev_qualities = []
        dups = 0
        for a in range(self.n_ants):
            ant = Ant(a, self.train_set, self.chances_before_brutally_murdering_ant, self.min_coverage)
            print("initialized ant " + str(a))
            ant.traverse(self.pheromone_map)
            
            for prev_rule in prev_rules:
                if ant.rule == prev_rule:
                    dups += 1
                    break
            else:
                ant.set_emotion()
                Q = ant.rule.quality()

                mask = np.zeros_like(entropies)
                mask[list(ant.rule.terms)] = 1
                
                self.deposit_pheromones(np.multiply(self.pheromone_map, mask) * Q)
                self.evaporate_pheromones()

                if a%20 == 0:
                    self.plot_pheromones(a)

                prev_rules.append(ant.rule)
                prev_qualities.append(Q)
                
            if dups > self.max_dups:
                break

        imax = np.argmax(prev_qualities)
        
        return prev_rules[imax]
    
    def discover_rules(self):
        while(len(self.train_set) > self.max_uncovered):
            best_rule = self.dispatch_ants()
            corrects = best_rule.correct_set()
            self.train_set = pd.concat([self.train_set, corrects, corrects]).drop_duplicates(keep=False)
        

In [None]:
world = World(n_ants=2)

In [None]:
world.discover_rules()

# From here it's just random testing

In [None]:
np.unique(np.random.choice(4, 1000, p=[0, 0.33, 0.33, 0.34]), return_counts=True)

In [None]:
x = len(features) - 0
Eta = logK - H
denom = x * np.sum(Eta, axis=0)
Eta = np.divide(Eta, denom)

T = np.random.rand(H.shape[0], H.shape[1])
Probs = np.multiply(Eta, T)
denom = x * np.sum(Probs, axis=0)
Probs = np.divide(Probs, denom)
np.sum(Probs)

In [None]:
flattened = Probs.flatten()
num = np.random.choice(len(flattened), 1, p=flattened)
col = num % 652
row = num // 652
int(col), int(row)