# An implementation of the Ant miner paper for Emotion Recognition

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy

In [None]:
df = pd.read_csv("Resources/Ravdess Dataset.csv")
df.head()

In [None]:
#Level of discretization of the continuous data
bins = 3

In [None]:
plt.hist(pd.cut(df['f0'], bins=bins, labels=range(bins)), bins = bins)

In [None]:
plt.hist(pd.qcut(df['f0'], q=bins, labels=range(bins)), bins = bins)

In [None]:
discreet = pd.DataFrame()

In [None]:
for column in df.drop('class', axis=1).columns:
    #discreet[column] = pd.cut(df[column], bins=bins, labels=range(bins))
    discreet[column] = pd.qcut(df[column], q=bins, duplicates='drop')

In [None]:
temp_bins = discreet.nunique()

In [None]:
for column in df.drop('class', axis=1).columns:
    #discreet[column] = pd.cut(df[column], bins=bins, labels=range(bins))
    discreet[column] = pd.qcut(df[column], q=bins, duplicates='drop', labels=range(temp_bins[column]))

In [None]:
discreet = discreet.astype(float)

In [None]:
discreet = discreet.fillna(0)

In [None]:
discreet = discreet.astype(int)

In [None]:
discreet['class'] = df['class']

In [None]:
emotions = discreet['class'].unique()
emotions

In [None]:
train, test = np.split(discreet.sample(frac=1), [int(0.75*len(discreet))])
train_len = len(train)
test_len = len(test)
total_len = train_len + test_len
test_len, train_len, total_len

# Entropy values
H will be a matrix of size (bins x features). Computed in a vectorized way to save time (This would take like 10 mins if it was in a for loop, now it takes 10s). <br>
Ok fine I'm looping through the 6 emotions so techhhhnically it's not fully vectorized. Anyway we're computing H only once so I took lite

In [None]:
H = np.zeros((bins, len(train.columns)-1, len(emotions)))

In [None]:
# The most compute intensive step
for e, emotion in enumerate(emotions):
            H[:, :, e] = train.loc[train['class']==emotion].drop('class', axis=1).apply(pd.Series.value_counts).fillna(0).to_numpy()
#H

In [None]:
sums = np.sum(H, axis=2)
#sums

In [None]:
repeated_sums = np.repeat(sums[:, :, np.newaxis], H.shape[2], axis=2)
#repeated_sums[:, :, 1]

In [None]:
P = np.divide(H, repeated_sums, out=np.zeros_like(H), where=repeated_sums!=0)

In [None]:
logP = np.log2(P, out=np.zeros_like(P), where=P>0)
#logP

In [None]:
H = -np.sum(np.multiply(P, logP), axis=2)
#H

In [None]:
np.where(np.isnan(H))

In [None]:
vis = H[:, :-2]
vis = vis.reshape(30,65)
plt.imshow(vis)
plt.colorbar()
plt.show()

In [None]:
features = discreet.drop('class', axis=1).columns
logK = np.log2(len(emotions))

In [None]:
#Hyperparameters
min_coverage = 10
max_uncovered = 10
n_ants = 3000
max_dups = 2

In [None]:
class Rule:
    def __init__(self, train_set):
        self.terms = {}
        self.emotion = None
        self.numerosity = 0
        self.train_set = train_set
        self.match_set = train_set
    
    def match(self, instance):
        for key, value in instance.items():
            if key in self.terms and self.terms[key] != value:
                return False
        return True
    
    def new_match_set(self, term):
        return self.match_set.loc[self.match_set['f' + str(term['feature'])] == term['class']]
    
    def unmatch_set(self):
        return pd.concat([self.train_set, self.match_set, self.match_set]).drop_duplicates(keep=False)
    
    def correct_set(self):
        return self.match_set.loc[self.match_set['class'] == self.emotion]
    
    def exists(self, feature):
        return feature in self.terms
            
    def addTerm(self, term, new_matches):
        self.terms[term['feature']] = term['class']
        self.match_set = new_matches
        self.numerosity += 1
        
    def quality(self):
        matches = self.match_set
        unmatches = self.unmatch_set()
        TP = len(matches.loc[matches['class'] == self.emotion])
        FP = len(matches.loc[matches['class'] != self.emotion])
        FN = len(unmatches.loc[unmatches['class'] == self.emotion])
        TN = len(unmatches.loc[unmatches['class'] != self.emotion])
        
        sens = TP / (TP + FN)
        spec = TN / (FP + TN)
        
        return sens * spec
        
    def prune(self):
        pass
        

In [None]:
def normalize_probs(probs):
    return probs / np.sum(probs)

In [None]:
class Ant:
    def __init__(self, index, train_set, n_tries, min_coverage):
        self.index = index
        self.rule = Rule(train_set)
        self.n_tries = n_tries
        self.min_coverage = min_coverage
    
    def traverse(self, T):
        x = len(features) - self.rule.numerosity
        Eta = logK - H
        denom = x * np.sum(Eta, axis=0)
        Eta = np.divide(Eta, denom)
        
        Probs = np.multiply(Eta, T)
        denom = x * np.sum(Probs, axis=0)
        Probs = np.divide(Probs, denom)
        
        should_continue = True
        while(should_continue):
            tries = self.n_tries
            flattened = Probs.flatten()
            while(tries):
                chosen = int(np.random.choice(len(flattened), 1, p=flattened))
                term = {'feature': chosen % len(features), 'class': chosen // len(features)}
                new_cov = self.rule.new_match_set(term)
                if len(new_cov) >= self.min_coverage:
                    self.rule.addTerm(term, new_cov)
                    print("Added " + str(term))
                    
                    Probs[:, term['feature']] = 0
                    Probs = normalize_probs(Probs)
                    
                    should_continue = True
                    break
                tries -= 1
                should_continue = False
                
    def set_emotion(self):
        matches = self.rule.match_set
        self.rule.emotion = matches['class'].mode()[0]
        

In [None]:
class World:
    def __init__(
        self,
        min_coverage = 10,
        max_uncovered = 10,
        n_ants = 500,
        max_dups = 1,
        chances_before_brutally_murdering_ant = 10,
        train_set = train.copy(deep=True),
    ):
    
        self.min_coverage = min_coverage
        self.max_uncovered = max_uncovered
        self.n_ants = n_ants
        self.max_dups = max_dups
        self.chances_before_brutally_murdering_ant = chances_before_brutally_murdering_ant
        self.train_set = train_set
        self.pheromone_map = np.ones_like(H) * (1 / (bins * len(features)))
        self.discovered_rule_list = []

    def deposit_pheromones(self, new_pheromones):
        self.pheromone_map = self.pheromone_map + new_pheromones
        
    def evaporate_pheromones(self):
        self.pheromone_map = self.pheromone_map / np.sum(self.pheromone_map)
        
    def plot_pheromones(self, a):
        vis = self.pheromone_map[:, :-2]
        vis = vis.reshape(30,65)
        plt.imshow(vis)
        plt.colorbar()
        plt.savefig('Plots/Generation' + str(len(self.discovered_rule_list)) + 'ant' + str(a))
        plt.clf()
    
    def dispatch_ants(self):
        prev_rules = []
        prev_qualities = []
        dups = 0
        for a in range(self.n_ants):
            ant = Ant(a, self.train_set, self.chances_before_brutally_murdering_ant, self.min_coverage)
            print("initialized ant " + str(a))
            ant.traverse(self.pheromone_map)
            
            for prev_rule in prev_rules:
                if ant.rule == prev_rule:
                    dups += 1
                    break
            else:
                ant.set_emotion()
                Q = ant.rule.quality()

                mask = np.zeros_like(H)
                mask[list(ant.rule.terms.values()), list(ant.rule.terms)] = 1
                
                self.deposit_pheromones(np.multiply(self.pheromone_map, mask) * Q)
                self.evaporate_pheromones()

                if a%20 == 0:
                    self.plot_pheromones(a)

                prev_rules.append(ant.rule)
                prev_qualities.append(Q)
                
            if dups > max_dups:
                break

        imax = np.argmax(prev_qualities)
        
        return prev_rules[imax]
    
    def discover_rules(self):
        while(len(self.train_set) > self.max_uncovered):
            best_rule = self.dispatch_ants()
            corrects = best_rule.correct_set()
            self.train_set = pd.concat([self.train_set, corrects, corrects]).drop_duplicates(keep=False)

In [None]:
world = World()

In [None]:
world.discover_rules()

In [None]:
np.save('rules.npy', world.discovered_rule_list)

In [None]:
np.load('rules.npy', allow_pickle=True)

In [None]:
n_corrects = 0
n_matches = 0
for rule in world.discovered_rule_list:
    n_corrects += len(rule.correct_set())
    n_matches += len(rule.match_set)
    
n_corrects, n_matches, 100*n_corrects/n_matches

In [None]:
from collections import defaultdict

In [None]:
def predict(row):
    matching_rules = []
    instance = dict(row)
    
    preddict = defaultdict(lambda: 0)
    for rule in world.discovered_rule_list:
        if rule.match(instance):
            preddict[rule.emotion] += 1
            
    if preddict:
        return max(preddict, key=preddict.get)
    return 'unable to predict'

In [None]:
disc2 = discreet.copy(deep=True)
disc2.columns = range(len(discreet.columns))
disc2.rename(columns={652: 'class'}, inplace=True)
#disc2

In [None]:
disc2['preds'] = 'nothing'

In [None]:
disc2.at[1143, 'preds'] = predict(disc2.drop(['preds', 'class'], axis=1).iloc[1143])

In [None]:
disc2['preds'][1143]

In [None]:
for i, row in enumerate(disc2.iterrows()):
    disc2.at[i, 'preds'] = predict(disc2.drop(['preds', 'class'], axis=1).iloc[i])
#disc2

In [None]:
correct = disc2.loc[disc2['class'] == disc2['preds']]
predicted = disc2.loc[disc2['preds'] != 'unable to predict']
len(correct), len(predicted), len(correct)/len(predicted), len(correct)/len(disc2)

In [None]:
disc2['class'].value_counts()

In [None]:
correct['class'].value_counts()

# From here it's just random testing

In [None]:
np.unique(np.random.choice(4, 1000, p=[0, 0.33, 0.33, 0.34]), return_counts=True)

In [None]:
x = len(features) - 0
Eta = logK - H
denom = x * np.sum(Eta, axis=0)
Eta = np.divide(Eta, denom)

T = np.random.rand(H.shape[0], H.shape[1])
Probs = np.multiply(Eta, T)
denom = x * np.sum(Probs, axis=0)
Probs = np.divide(Probs, denom)
np.sum(Probs)

In [None]:
flattened = Probs.flatten()
num = np.random.choice(len(flattened), 1, p=flattened)
col = num % 652
row = num // 652
int(col), int(row)