# Classification report

### Target dataset

In [195]:
weka_path = 'weka/no-bow'
# Get project root
path = !pwd
report_path = '{}/{}/{}'.format('/'.join(path[0].split('/')[:-1]), 'reports', weka_path)
report_path

'/home/gianlu/Documents/Dev/irony-detection/reports/weka/no-bow'

Read report

In [196]:
def read_report(path_file):
	with open(path_file) as file:
		file_content = file.readlines()
	return [line.strip() for line in file_content]

def extract_name(file_path):
	filename = file_path.split('/')[-1]
	features = '-'.join(filename.split('.')[0].split('-')[1:])
	return features

Evaluate report

In [197]:
def extract_report_data(file_content):
	dict_report = dict()
	# Extract dict report
	dict_report['accuracy'] = float(file_content[3].split()[-2])/100
	dict_report['False'] = extract_report_type(file_content, 16)
	dict_report['True'] = extract_report_type(file_content, 15)
	dict_report['weighted avg'] = extract_report_type(file_content, 17)
	# Build report dict
	dict_data = dict()
	dict_data['report'] = dict_report
	dict_data['confusion-matrix'] = extract_confusion_matrix(file_content)
	# Return dict
	return dict_data

def extract_report_type(file_content, line):
	split_line = file_content[line].split()
	# Support
	if line == 16:
		support = 30000
	elif line == 15:
		support = 10000
	else:
		split_line = split_line[2:]
		support = 40000
	# Split line
	report_dict = extract_line(split_line)
	report_dict['support'] = float(support)
	
	return report_dict
	
def extract_line(line):
	report_dict = dict()
	report_dict['precision'] = float(line[2])
	report_dict['recall'] = float(line[3])
	report_dict['f1-score'] = float(line[4])
	return report_dict

def extract_dict(path_file):
	# Read file content
	file_content = read_report(path_file)
	# Build dict
	out_dict = dict()
	out_dict['overall'] = extract_report_data(file_content)
	out_dict['features'] = extract_name(path_file)
	out_dict['classifier'] = 'BayesianNetworks'
	# Return dict
	return out_dict

Confusion matrix

In [198]:
import numpy as np
def extract_confusion_matrix(file_content):
	tp, fp, *_ = file_content[22].split()
	fn, tn, *_ = file_content[23].split()
	# Create numpy matrix
	matrix = np.array([[tp, fp], [fn, tn]], dtype=np.float)
	# Swap rows
	out_matrix = np.zeros((2, 2))
	out_matrix[0, :] = matrix[1, :] 
	out_matrix[1, :] = matrix[0, :] 
	# Swap columns
	out_matrix[0, 0], out_matrix[0, 1] = out_matrix[0, 1], out_matrix[0, 0]
	out_matrix[1, 0], out_matrix[1, 1] = out_matrix[1, 1], out_matrix[1, 0]
	# Return confusion matrix
	return out_matrix.tolist()

Save json report

In [199]:
import json
def dump_dict_report(dict):
	filename = '{}${}.json'.format(dict['features'], 'BayesianNetworks')
	path = '{}/json/{}'.format(report_path, filename)
	with open(path, 'w') as fp:
		json.dump(dict, fp)


Read all files

In [200]:
import os
file = 'labeled_matrix-emot.txt'
path_file = '{}/{}'.format(report_path, file)

file_list = os.listdir('{}/text'.format(report_path))

Evaluate all reports

In [201]:
for file in os.listdir('{}/text'.format(report_path)):
	# Get file path
	path_file = '{}/text/{}'.format(report_path, file)
	# Extract report dict
	report_dict = extract_dict(path_file)
	# Dump dict
	dump_dict_report(report_dict)