In [1]:
import os
import numpy as np
import pandas as pd
import csv
from csv import reader
from csv import writer
import custom_models as cm
from sklearn import tree
from scipy.stats import uniform, norm
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE, RandomOverSampler
from sklearn.feature_selection import SelectFromModel, SelectKBest, VarianceThreshold, chi2, f_classif, mutual_info_classif
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from XLB import *
import xlb_hyperparamsearch as xlbh
from apyori import apriori

import warnings
warnings.filterwarnings('ignore')

In [2]:
# extract data from files
x_train, y_train = extract_data("FinalTrainingSet.csv")
x_val, y_val = extract_data("Validation Set.csv")

# scale data values
scaler = MinMaxScaler()
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_val = scaler.transform(x_val)

# feature selection
num_features = 69
feat_sel = VarianceThreshold()
x_train = feat_sel.fit_transform(x_train)
feat_sel_2 = SelectKBest(chi2,k=num_features)
x_train = feat_sel_2.fit_transform(x_train,y_train)
# print(feat_sel_2.get_support())
x_val = feat_sel_2.transform(feat_sel.transform(x_val))

rand_seed = 3454132

oversampler = SMOTE(sampling_strategy="not majority",random_state=rand_seed)
x_smote, y_smote = oversampler.fit_resample(x_train,y_train)
# print(x_smote.shape,y_smote.shape)

oversampler = RandomOverSampler(sampling_strategy="not majority",random_state=rand_seed)
x_os, y_os = oversampler.fit_resample(x_train,y_train)
# print(x_os.shape,y_os.shape)

In [3]:
#MOVING FEATURE HEADERS INTO A LIST
import csv

f = open("FinalTrainingSet.csv")
reader = csv.reader(f)
features = next(reader)
row = list(reader)

csv_temp = pd.read_csv("FinalTrainingSet.csv")
Theme_numbered = csv_temp['Theme(Numbered)'].tolist()
Theme_numbered = np.asarray(Theme_numbered) 

# print(Theme_numbered.shape)
x_train = np.append(x_train, Theme_numbered.reshape(Theme_numbered.shape[0], 1), axis=1)
# print(x_train.shape)

# x_train = np.delete(x_train, 69, axis=1)

#Deleting everything except features from the dataset
features.remove("Row Labels")
features.remove("Theme")
features.remove("Theme(Numbered)")
# print(len(features))
#Retained features after selection
selected_feats = feat_sel_2.get_support(True)

for ind, ft in sorted(enumerate(features), reverse=True): 
    if ind not in selected_feats:
        del features[ind]  
        
# np.append(x_train, Theme_numbered)
features.append('Theme_numbered')

new_column = pd.DataFrame({'Theme_numbered': Theme_numbered}) 
csv_temp = csv_temp.merge(new_column, left_index = True, right_index = True)

column = csv_temp.Theme_numbered

# print(features)
row_count = len(row)
f.close()
 
# print(x_train.shape)

In [4]:
num_folds = 5
model = cm.APyoriAdapter(params={})
emotions = ["IsCalm", "IsCheerful", "IsBravery", "IsFearful", "IsLove", "IsSadness"]
label_supp = [0.3117,  0.1372, 0.1397,  0.2469, 0.0673, 0.0973]
parameters = {
    "num_features" : 69,
    "thresh_mean" : 0.5,
    "thresh_std" : 0.15,
    "min_support_lo" : 0.16,
    "min_support_hi" : 0.22,
    "min_confidence_lo" : 0.036,
    "min_confidence_hi" : 0.739,
    "col_names" : features,
    "label_names" : emotions,
    "label_support" : label_supp    
}
hyperparams, result, model = xlbh.hyperparameter_search(
    num_folds=num_folds,model=model,parameters=parameters,data=x_train
)

In [7]:
for k, v in hyperparams.items():
    print("{} : {}".format(k,v))

thresholds : [0.48050678 0.32807564 0.43900294 0.74845356 0.57791156 0.42964089
 0.33966644 0.44123084 0.69838258 0.40328533 0.5417637  0.37862079
 0.55270803 0.3801969  0.7039169  0.55340509 0.59058057 0.54016036
 0.56674747 0.45998384 0.55135309 0.54082865 0.43359724 0.64295728
 0.42960654 0.45020311 0.28487405 0.62473996 0.35172048 0.63670339
 0.49226847 0.52469802 0.39120118 0.35770744 0.61915086 0.50177643
 0.6104294  0.52046027 0.73731138 0.47526139 0.63841276 0.56156508
 0.54770146 0.39352515 0.50360072 0.59354345 0.53135825 0.71596815
 0.86144481 0.66277257 0.39689212 0.35072324 0.2807026  0.51430314
 0.51943596 0.4366115  0.39053333 0.68778331 0.66910001 0.32893095
 0.99886982 0.37818672 0.65605119 0.5021598  0.62549403 0.43635437
 0.53362056 0.45122747 0.36371422]
min_support : 0.19855729431471736
min_confidence : 0.24891280707884905
col_names : ['Tempo', 'Arousal', 'IsMajor', 'IsMinor', 'IsDissonant', 'Derivative of Root Mean Square Overall Standard Deviation', 'Derivative o

In [5]:
with open("rules.txt","w") as fOut:
    np.random.seed(69420)
    model = cm.APyoriAdapter(hyperparams)

    ruleset = model.train(x_train)
#     print("{}\n".format("\n\n".join([x.__str__() for x in ruleset])))
    fOut.write("{}\n".format("\n\n".join([x.__str__() for x in ruleset])))
    print("Average Interestingness: {:.2f}".format(model.evaluate()))

Average Interestingness: 0.00
