In [67]:
# Import needed functions
from csv import reader
from math import sqrt
from math import e
from math import pi

In [111]:
# Load csv file and retun as a list of list
def load_csv(filename):
	count = 0
	dataset = list()
	with open(filename, 'r', encoding="utf8") as file:
		csv_reader = reader(file)
		for row in csv_reader:
			# remove column names (Gender,Emotion_Joy,Emotion_Sadness,Emotion_Anger,Emotion_Disgust,Emotion_Fear,Emotion_Surprise,Emotion_Contempt,Emotion_Neutral,Depression)
			if count == 0:
				count = 1
				continue
			dataset.append(row)
	return dataset

def convert_categorical_to_numerical(dataset):
	'''
	Converts categorical value to numerical

	Returns
	dataset (list) : list of list of dataset where categorical values are converted into numerical values
	'''
	for row in dataset:
		# Female->0, Male->1
		if row[0] == "Female":
			row[0] = 0
		else:
			row[0] = 1
		
		# No->0, Yes-> 1
		if row[-1] == "NO":
			row[-1] = 0
		else:
			row[-1] = 1
	return dataset

def convert_string_to_int(dataset):
	'''
	Converts string type to int type

	Returns
	dataset (list) : list of list of dataset where all values are int type
	'''
	for row in dataset:
		for i in range(len(row)):
			# if value is int type, continue
			if isinstance(row[i], int):
				continue
			# else, cast value to int type
			else:
				try:
					row[i] = int(row[i])
				except ValueError:
					continue
	return dataset

def split_data_by_labels(dataset):
	'''
	Split the dataset by class labels

	Returns
	labels_list (list) : list of distinct labels in the dataset
		eg: [0, 1] 
		- there are two different labels found in the dataset

	seperated_dataset (list) : list of list of seperated dataset
		eg: [[[0, 4, 3, 2],[0, 8, 0, 2],[1, 6, 2, 3]], [[1, 3, 2, 1],[0, 6, 3, 2],[0, 6, 1, 0]]] 
		- a list with length of 2 is returned where each element corresponds to the row value of the respective labels
	'''
	labels_list = []
	seperated_dataset = []

	for row in dataset:
		# get the label value of the row, assume that the label value is always the last column of the dataset
		label_value = row[-1]
		# check if label value has been stored into labels_list
		if label_value not in labels_list:
			# append new label value to labels_list
			labels_list.append(label_value)
			# append new empty list to seperated_dataset and add the row data into the new list
			seperated_dataset.append([])
			seperated_dataset[-1].append(row[:-1])
		else:
			# get index of corresponding label value
			index = labels_list.index(row[-1])
			# append the row to seperated_dataset of the index 
			seperated_dataset[index].append(row[:-1])

	return labels_list, seperated_dataset

def calculate_mean(list_numbers):
	'''
	Calculate the mean value from a list of numbers

	Returns
	mean (float) : mean value
	'''
	sum = 0
	# loop through list of numbers and add them up
	for num in list_numbers:
		sum += num
	# divide the sum by the total list_numbers count
	mean = sum / len(list_numbers)
	return mean
 
def calculate_standard_deviation(numbers):
	'''
	Calculate the standard deviation from a list of numbers

	Returns
	standard_deviation (float) : standard deviation value
	'''
	# calculate the mean
	mean = calculate_mean(numbers)
	sum_square_difference = 0
	# loop through the list of numbers and add up the squared differences 
	for num in numbers:
		sum_square_difference += (num - mean) ** 2
	# clculate variance 
	variance = sum_square_difference / (len(numbers) - 1)
	# clculate standard deviation 
	standard_deviation = sqrt(variance)
	return standard_deviation

def calculate_details(seperated_dataset):
	'''
	Calculate mean, standard deviation & count by columns seperated by labels

	Returns
	value_result (list) : [mean, std_dev] list of list of mean & standard deviation calculated for each column by each labels
		eg: [[[0.66, 0.47, 4],[8.72, 10.72, 4],[2.29, 3.35, 4], [[0.56, 0.50, 5],[9.12, 11.96, 5],[3.67, 3.35, 5]]]
	'''
	value_result = []
	# labels_list, seperated_dataset = split_data_by_labels(dataset) TODO: remove
	for j in range(len(seperated_dataset)):
		x_list = []
		count = 0
		for row in seperated_dataset[j]:
			for i in range(len(row)):
				if count == 0:
					x_list.append([])
					x_list[-1].append(row[i])
				else:
					x_list[i].append(row[i])
			count = 1

		count2 = 0
		for x_s in x_list:
			if count2 == 0:
				value_result.append([])
				count2 = 1
			value_result[-1].append([calculate_mean(x_s), calculate_standard_deviation(x_s), len(seperated_dataset[j])])
	return value_result

def calculate_gaussian_probability(x, mean, std_dev):
	'''
	Calculate Gaussian Probability of x

	Returns
	gaussian_probability (float) : Gaussian Probability of x
	'''
	lhs = 1 / (std_dev * sqrt(2 * pi))
	exponent = -(1 / 2) * ((x - mean) / std_dev) ** 2 
	gaussian_probability = lhs * (e ** exponent) 
	return gaussian_probability

def calculate_probabilities_by_class(row, labels_list, dataset_details):
	'''
	Calculate Probability of a row for each labels

	Returns
	probabilities_list (list) : list of probabilities for each labels
	'''
	probabilities_list = []
	for i in range(len(labels_list)):
		probability = dataset_details[i][0][-1] / sum([dataset_details[k][0][-1] for k in range(len(dataset_details))]) 

		for j in range(len(row)):
			probability *= calculate_gaussian_probability(row[j], dataset_details[i][j][0], dataset_details[i][j][1])
		probabilities_list.append(probability)
	return probabilities_list

# TODO: add param to comments
def predict(row, labels_list, dataset_details):
	'''
	Predict the label for a given row.
	    
	Returns 
	predicted_label (int) : the integer value representing the label with the highest probability
	'''
	probabilities_list = calculate_probabilities_by_class(row, labels_list, dataset_details)
	prediction = -1
	predicted_label = -1
	# loop through probability calculated for each label
	for i in range(len(probabilities_list)):
		# get the label with the highest probability value
		if probabilities_list[i] > prediction:
			prediction = probabilities_list[i]
			predicted_label = labels_list[i]
	return predicted_label

def predict_all(dataset_without_labels, labels_list, dataset_details):
	''' 
	Predict all labels for dataset using Naive Bayes classification algorithm.

	Returns
	predicted_labels (list) : the list of predicted labels
	'''
	predicted_labels = []
	for row in dataset_without_labels:
		predicted_label = predict(row, labels_list, dataset_details)
		predicted_labels.append(predicted_label)
	return predicted_labels

def calculate_accuracy(predicted_labels, actual_labels):
	'''
	Calculate the accuracy based on the predicted labels

	Returns
	accuracy (int) : the accuracy
	'''
	correct_count = 0
	# loop through predicted and actual labels
	for predicted, actual in zip(predicted_labels, actual_labels):
		# if predicted label equals the actual label
		if predicted == actual:
			correct_count += 1
	# calculate accuracy
	accuracy = correct_count / len(predicted_labels)
	return accuracy

def zeroR(dataset, labels_list):
	'''
	Predict all labels for dataset using ZeroR classification algorithm.

	Returns
	predicted_labels (list) : the list of predicted labels
	'''
	# initialize a list of 0 that has the same length as all possible labels to store total count of each labels in the dataset
	label_count_list = [0] * len(labels_list)

	#loop through each row in the dataset
	for row in dataset:
		# loop through each label in the labels_list
		for i in range(len(labels_list)):
			# check if the actual label of the row is equal to the label of the labels_list
			if row[-1] == labels_list[i]:
				# add the count of the label in the label_count_list
				label_count_list[i] +=1

	# loop through the label_count_list to find the most frequent label in the dataset
	most_frequent_label = -1
	most_frequent_label_count = -1
	for i in range(len(label_count_list)):
		# if the count of current label is greater than the highest label count
		if label_count_list[i] > most_frequent_label_count:
			most_frequent_label_count = label_count_list[i]
			# set highest label count as the current label
			most_frequent_label = labels_list[i]

	print("label_count_list: ", label_count_list)
	print("most_frequent_label: ", most_frequent_label)
	predicted_labels = [most_frequent_label] * len(dataset)
	return predicted_labels

def build_frequency_table(column_value_list, labels):
	''' 
	Count the frequency of each label for each column value

	Returns
	frequency_table (dict of dict) : dictionary with column value as key and frequency count of each lables 
		eg: {2: {0: 5, 1: 9}, 0: {0: 27, 1: 21}, 5: {1: 1}}
	'''
	# Count the frequency of each class label for each feature value
	frequency_table = {}
	for feature_value, label in zip(column_value_list, labels):
		if feature_value not in frequency_table:
			frequency_table[feature_value] = {}
		if label not in frequency_table[feature_value]:
			frequency_table[feature_value][label] = 0
		frequency_table[feature_value][label] += 1

	return frequency_table

def calculate_error(column_value_list, labels):
	"""
	Calculate the error rate for a given feature and label pair.

	Parameters: TODO: remove
		column_value_list (list): A list of feature values.
		labels (list): A list of corresponding class labels.
		
	Returns:
		The error rate for this feature.
	"""

	frequency_table = build_frequency_table(column_value_list, labels)
	# # Count the frequency of each class label for each feature value
	# frequency_table = {}
	# for feature_value, label in zip(column_value_list, labels):
	# 	if feature_value not in frequency_table:
	# 		frequency_table[feature_value] = {}
	# 	if label not in frequency_table[feature_value]:
	# 		frequency_table[feature_value][label] = 0
	# 	frequency_table[feature_value][label] += 1

	# Calculate the total number of samples
	total_samples = len(column_value_list)
	total_errors = 0
	# loop through each column value 
	for feature_value, label_counts in frequency_table.items():
		# label_counts is the inner dictoray for the occurence frequency for each labels
		# get the label with higest count for this column
		higest_count_label = max(label_counts, key=label_counts.get)
		error_count = sum(label_counts.values()) - label_counts[higest_count_label]
		total_errors += error_count

    # Return the error rate
	return total_errors / total_samples

def oneR_predict(column_value, best_predictor_frequency_table):
	'''
	return predicted label based on the best predictor

	Returns
		predicted_label (int) : predicted label
	'''
	column_value_dict = best_predictor_frequency_table[column_value]
	predicted_label = max(column_value_dict, key=column_value_dict.get)

	return predicted_label

def OneR_classification(dataset_without_labels, train_labels):
	"""
	Implements the OneR classification algorithm, which selects a single feature to make predictions.

	Parameters:  TODO: remove
		train_features (list of lists): A list of training samples, where each sample is a list of feature values.
		train_labels (list): A list of labels from the training set.
		
	Returns:
		predicted_labels (list) : the list of predicted labels
	"""
	# Find the best feature by calculating the error rate for each feature
	best_predictor_index = None
	lowest_error_rate = 9999
	# loop though each column
	for column_index in range(len(dataset_without_labels[0])):
		# get a list of all values in a column
		column_values = [row[column_index] for row in dataset_without_labels]
		error_rate = calculate_error(column_values, train_labels)
		# get the lowest error rate 
		if error_rate < lowest_error_rate:
			best_predictor_index = column_index
			lowest_error_rate = error_rate

	# get the column values of the best predictor
	best_predictor_column_values = [row[best_predictor_index] for row in dataset_without_labels]
	# get the frequency table of the best predictor
	best_predictor_frequency_table =  build_frequency_table(best_predictor_column_values, train_labels)

	# predict the labels by using the best predictor
	predicted_labels = []
	for column_value in best_predictor_column_values:
		predicted_label = oneR_predict(column_value, best_predictor_frequency_table)
		predicted_labels.append(predicted_label)

	return predicted_labels

In [149]:
dataset = load_csv("dataset.csv")
dataset = convert_categorical_to_numerical(dataset)
dataset = convert_string_to_int(dataset)
labels_list, seperated_dataset = split_data_by_labels(dataset)
dataset_details = calculate_details(seperated_dataset)

# predict one row of data only
predicted_label = predict(dataset[0][:-1], labels_list, dataset_details)

# get dataset without the labels
dataset_without_labels = [row[:-1] for row in dataset]
# get a list of actual labels of the dataset
actual_labels = [row[-1] for row in dataset]

# predict the labels with Naive Bayes classification algorithm
predicted_labels_naive_bayes = predict_all(dataset_without_labels, labels_list, dataset_details)
# predict the labels with ZeroR classification algorithm
predicted_labels_zeroR = zeroR(dataset, labels_list)
# predict the labels with OneR classification algorithm
predicted_labels_oneR = OneR_classification(dataset_without_labels, actual_labels)

# calculate accuracy of each classification algorithm
accuracy_naive_bayes = calculate_accuracy(predicted_labels_naive_bayes, actual_labels)
accuracy_zeroR = calculate_accuracy(predicted_labels_zeroR, actual_labels)
accuracy_oneR = calculate_accuracy(predicted_labels_oneR, actual_labels)
print("accuracy_naive_bayes: ", accuracy_naive_bayes)
print("accuracy_zeroR: ", accuracy_zeroR)
print("accuracy_oneR: ", accuracy_oneR)
print(predicted_labels_naive_bayes)


label_count_list:  [48, 52]
most_frequent_label:  1
accuracy_naive_bayes:  0.58
accuracy_zeroR:  0.52
accuracy_oneR:  0.68
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0]


In [154]:
from sklearn.preprocessing import StandardScaler

dataset = load_csv("dataset.csv")
dataset = convert_categorical_to_numerical(dataset)
dataset = convert_string_to_int(dataset)
dataset_without_labels = [row[:-1] for row in dataset]
# get a list of actual labels of the dataset
actual_labels = [row[-1] for row in dataset]

# create an instance of the StandardScaler class
scaler = StandardScaler()

# fit the scaler to the data and transform it
dataset_without_labels_scaled = scaler.fit_transform(dataset_without_labels)
dataset_without_labels_scaled = dataset_without_labels_scaled.tolist()

dataset_with_labels_scaled = []
for i in range(len(dataset_without_labels_scaled)):
    temp_row = dataset_without_labels_scaled[i].copy()
    temp_row.append(actual_labels[i])
    dataset_with_labels_scaled.append(temp_row)

labels_list, seperated_dataset_scaled = split_data_by_labels(dataset_with_labels_scaled)
dataset_details_scaled = calculate_details(seperated_dataset_scaled)

# predict the labels with Naive Bayes classification algorithm
predicted_labels_naive_bayes_scaled = predict_all(dataset_without_labels_scaled, labels_list, dataset_details_scaled)
# calculate accuracy of each classification algorithm
accuracy_naive_bayes_scaled = calculate_accuracy(predicted_labels_naive_bayes_scaled, actual_labels)
print(accuracy_naive_bayes_scaled)

0.58


In [147]:
from sklearn.preprocessing import MinMaxScaler

dataset = load_csv("dataset.csv")
dataset = convert_categorical_to_numerical(dataset)
dataset = convert_string_to_int(dataset)
dataset_without_labels = [row[:-1] for row in dataset]
# get a list of actual labels of the dataset
actual_labels = [row[-1] for row in dataset]

# create an instance of the StandardScaler class
scaler = MinMaxScaler()

# fit the scaler to the data and transform it
dataset_without_labels_scaled = scaler.fit_transform(dataset_without_labels)
dataset_without_labels_scaled = dataset_without_labels_scaled.tolist()

dataset_with_labels_scaled = []
for i in range(len(dataset_without_labels_scaled)):
    temp_row = dataset_without_labels_scaled[i].copy()
    temp_row.append(actual_labels[i])
    dataset_with_labels_scaled.append(temp_row)

labels_list, seperated_dataset_scaled = split_data_by_labels(dataset_with_labels_scaled)
dataset_details_scaled = calculate_details(seperated_dataset_scaled)

# predict the labels with Naive Bayes classification algorithm
predicted_labels_naive_bayes_scaled = predict_all(dataset_without_labels_scaled, labels_list, dataset_details_scaled)
# calculate accuracy of each classification algorithm
accuracy_naive_bayes_scaled = calculate_accuracy(predicted_labels_naive_bayes_scaled, actual_labels)
print(accuracy_naive_bayes_scaled)

0.58


In [151]:
print(dataset_details_scaled)
print(dataset_details)

[[[0.6666666666666666, 0.47639306734033077, 48], [0.09093137254901956, 0.12612131724931921, 48], [0.10416666666666667, 0.15233899171176235, 48], [0.234375, 0.252151645102029, 48], [0.10677083333333333, 0.13883439385987795, 48], [0.10984848484848485, 0.15345311234094494, 48], [0.18489583333333334, 0.15893090156301007, 48], [0.1128472222222222, 0.1839364542126907, 48], [0.028617216117216113, 0.02355676282006951, 48]], [[0.5576923076923077, 0.5015060275070906, 52], [0.09547511312217191, 0.1406498028172087, 52], [0.16695804195804193, 0.1524182941768658, 52], [0.3389423076923077, 0.30652277614006007, 52], [0.18990384615384615, 0.21507292512580922, 52], [0.16258741258741263, 0.20885171039410946, 52], [0.23557692307692307, 0.23698195067812577, 52], [0.15705128205128208, 0.19571516122629867, 52], [0.054628064243448855, 0.13712504106303772, 52]]]
[[[0.6666666666666666, 0.47639306734033077, 48], [8.729166666666666, 10.720311966192133, 48], [2.2916666666666665, 3.3514578176587717, 48], [1.875, 2.

In [144]:
dataset_details_scaled

[[[0.6666666666666666, 0.47639306734033077, 48],
  [0.09093137254901956, 0.12612131724931921, 48],
  [0.10416666666666667, 0.15233899171176235, 48],
  [0.234375, 0.252151645102029, 48],
  [0.10677083333333333, 0.13883439385987795, 48],
  [0.10984848484848485, 0.15345311234094494, 48],
  [0.18489583333333334, 0.15893090156301007, 48],
  [0.1128472222222222, 0.1839364542126907, 48],
  [0.028617216117216113, 0.02355676282006951, 48]],
 [[0.5576923076923077, 0.5015060275070906, 52],
  [0.09547511312217191, 0.1406498028172087, 52],
  [0.16695804195804193, 0.1524182941768658, 52],
  [0.3389423076923077, 0.30652277614006007, 52],
  [0.18990384615384615, 0.21507292512580922, 52],
  [0.16258741258741263, 0.20885171039410946, 52],
  [0.23557692307692307, 0.23698195067812577, 52],
  [0.15705128205128208, 0.19571516122629867, 52],
  [0.054628064243448855, 0.13712504106303772, 52]]]

In [None]:
import numpy as np

# create a numpy array
arr = np.array([1, 2, 3, 4, 5])

# convert to a list
lst = arr.tolist()

print(lst)  # Output: [1, 2, 3, 4, 5]


In [125]:
# Using the copy() method
old_list = [1, 2, 3]
new_list = old_list.copy()
new_list.append(4)

print(old_list)  # Output: [1, 2, 3]
print(new_list)  # Output: [1, 2, 3, 4]

[1, 2, 3]
[1, 2, 3, 4]


In [89]:
column_value_list = [1, 2, 3, 2,2]
labels = [0,1,0,0, 1]
frequency_table = {}
for feature_value, label in zip(column_value_list, labels):
    if feature_value not in frequency_table:
        frequency_table[feature_value] = {}
    if label not in frequency_table[feature_value]:
        frequency_table[feature_value][label] = 0
    frequency_table[feature_value][label] += 1

print(frequency_table)
# most_common_label = max(label_counts, key=label_counts.get)
# print(most_common_label)


{1: {0: 1}, 2: {1: 2, 0: 1}, 3: {0: 1}}


In [100]:
my_dict = {"apple": 2, "banana": 3, "orange": 3}

max_key = max(my_dict, key=my_dict.get)

print(max_key)   # Output: "orange"


banana


In [87]:
max_key

'c'

In [80]:

# Test calculating class probabilities
dataset = [[3.393533211,2.331273381,0],
	[3.110073483,1.781539638,0],
	[1.343808831,3.368360954,0],
	[3.582294042,4.67917911,0],
	[2.280362439,2.866990263,0],
	[7.423436942,4.696522875,1],
	[5.745051997,3.533989803,1],
	[9.172168622,2.511101045,1],
	[7.792783481,3.424088941,1],
	[7.939820817,0.791637231,1]]
# dataset = convert_categorical_to_numerical(dataset)
# dataset = convert_string_to_int(dataset)
labels_list, seperated_dataset = split_data_by_labels(dataset)
dataset_details = calculate_details(seperated_dataset)
predicted_label = predict(dataset[0][:-1], labels_list, dataset_details)
# use dataset[:-1] to pass in dataset without the labels
dataset_without_labels = [row[:-1] for row in dataset]
actual_labels = [row[-1] for row in dataset]
predicted_labels = predict_all(dataset_without_labels, labels_list, dataset_details)
predicted_labels_zeroR = zeroR(dataset, labels_list)

oneR_result = OneR_classification(dataset_without_labels, actual_labels)
print(oneR_result)

label_count_list:  [5, 5]
most_frequent_label:  0
row:  {3.393533211: {0: 1}, 3.110073483: {0: 1}, 1.343808831: {0: 1}, 3.582294042: {0: 1}, 2.280362439: {0: 1}, 7.423436942: {1: 1}, 5.745051997: {1: 1}, 9.172168622: {1: 1}, 7.792783481: {1: 1}, 7.939820817: {1: 1}}
error rate:  0.0
row:  {2.331273381: {0: 1}, 1.781539638: {0: 1}, 3.368360954: {0: 1}, 4.67917911: {0: 1}, 2.866990263: {0: 1}, 4.696522875: {1: 1}, 3.533989803: {1: 1}, 2.511101045: {1: 1}, 3.424088941: {1: 1}, 0.791637231: {1: 1}}
error rate:  0.0
{'best_feature': 0, 'error_rate': 0.0}


In [60]:
calculate_accuracy(predicted_labels_zeroR, actual_labels)

0.5

In [57]:
actual_labels

[0, 0, 0, 0, 0, 0, 1, 1, 1, 1]

In [6]:
dataset[0][:-1],[0,1]

([3.393533211, 2.331273381], [0, 1])

In [7]:
dataset_details

[[[2.85742739175, 1.0275719447560472, 4],
  [3.04008827075, 1.275505774560249, 4]],
 [[6.725604049666667, 2.441618256572654, 6],
  [2.9707216929999998, 1.3016622439989782, 6]]]

In [56]:
sum([dataset_details[k][0][-1] for k in range(len(dataset_details))])

10

In [10]:
type(labels_list[0])

int

In [60]:
dataset

[[0, 4, 3, 2, 1, 0, 2, 2, 1, 0],
 [0, 8, 0, 2, 0, 1, 0, 0, 4, 0],
 [1, 5, 0, 0, 0, 14, 2, 0, 15, 0],
 [1, 7, 0, 3, 0, 0, 5, 0, 0, 0],
 [1, 3, 2, 1, 0, 2, 1, 0, 6, 1],
 [0, 23, 1, 1, 0, 0, 1, 1, 3, 0],
 [0, 6, 3, 2, 1, 1, 1, 1, 3, 1],
 [1, 6, 0, 2, 0, 1, 1, 0, 5, 0],
 [0, 1, 7, 3, 0, 7, 1, 0, 2, 1],
 [1, 3, 2, 2, 1, 0, 2, 2, 8, 1],
 [1, 6, 0, 3, 0, 2, 1, 2, 4, 1],
 [1, 6, 0, 1, 3, 4, 2, 7, 3, 0],
 [1, 6, 0, 1, 0, 0, 0, 3, 6, 0],
 [0, 3, 1, 0, 0, 1, 2, 3, 4, 0],
 [0, 7, 3, 5, 0, 1, 1, 0, 3, 0],
 [1, 11, 2, 0, 0, 16, 1, 1, 12, 1],
 [1, 6, 0, 0, 2, 0, 1, 0, 8, 1],
 [1, 19, 5, 8, 0, 7, 1, 0, 20, 1],
 [1, 3, 0, 1, 1, 0, 3, 0, 10, 0],
 [1, 1, 3, 1, 3, 1, 2, 2, 2, 0],
 [1, 3, 2, 1, 1, 1, 4, 1, 1, 0],
 [1, 4, 2, 1, 1, 1, 1, 1, 4, 1],
 [1, 9, 1, 3, 0, 2, 1, 4, 2, 1],
 [0, 5, 3, 5, 2, 1, 2, 1, 0, 1],
 [1, 7, 2, 1, 2, 1, 2, 0, 2, 1],
 [0, 9, 0, 0, 0, 0, 1, 4, 1, 0],
 [1, 6, 2, 5, 1, 3, 3, 1, 10, 1],
 [1, 18, 2, 6, 2, 4, 5, 2, 9, 0],
 [1, 25, 4, 2, 0, 7, 2, 0, 17, 0],
 [1, 10, 12, 0, 0, 2, 2, 0, 11

In [48]:
dataset_details

[[[0.6666666666666666, 0.47639306734033077, 48],
  [8.729166666666666, 10.720311966192133, 48],
  [2.2916666666666665, 3.3514578176587717, 48],
  [1.875, 2.017213160816232, 48],
  [0.8541666666666666, 1.1106751508790236, 48],
  [2.4166666666666665, 3.3759684715007894, 48],
  [1.4791666666666667, 1.2714472125040805, 48],
  [1.3541666666666667, 2.2072374505522894, 48],
  [5.208333333333333, 4.287330833252652, 48]],
 [[0.5576923076923077, 0.5015060275070906, 52],
  [9.115384615384615, 11.95523323946274, 52],
  [3.673076923076923, 3.353202471891047, 52],
  [2.7115384615384617, 2.4521822091204806, 52],
  [1.5192307692307692, 1.7205834010064738, 52],
  [3.576923076923077, 4.594737628670407, 52],
  [1.8846153846153846, 1.8958556054250062, 52],
  [1.8846153846153846, 2.3485819347155843, 52],
  [9.942307692307692, 24.956757473472866, 52]]]

In [51]:
dataset_details[1][0][-1] 

52

In [10]:
seperated_dataset

[[[0, 4, 3, 2, 1, 0, 2, 2, 1],
  [0, 8, 0, 2, 0, 1, 0, 0, 4],
  [1, 5, 0, 0, 0, 14, 2, 0, 15],
  [1, 7, 0, 3, 0, 0, 5, 0, 0],
  [0, 23, 1, 1, 0, 0, 1, 1, 3],
  [1, 6, 0, 2, 0, 1, 1, 0, 5],
  [1, 6, 0, 1, 3, 4, 2, 7, 3],
  [1, 6, 0, 1, 0, 0, 0, 3, 6],
  [0, 3, 1, 0, 0, 1, 2, 3, 4],
  [0, 7, 3, 5, 0, 1, 1, 0, 3],
  [1, 3, 0, 1, 1, 0, 3, 0, 10],
  [1, 1, 3, 1, 3, 1, 2, 2, 2],
  [1, 3, 2, 1, 1, 1, 4, 1, 1],
  [0, 9, 0, 0, 0, 0, 1, 4, 1],
  [1, 18, 2, 6, 2, 4, 5, 2, 9],
  [1, 25, 4, 2, 0, 7, 2, 0, 17],
  [0, 8, 0, 3, 0, 0, 1, 0, 3],
  [1, 26, 0, 0, 0, 13, 0, 11, 4],
  [1, 4, 3, 5, 0, 2, 0, 0, 5],
  [1, 6, 1, 0, 0, 0, 1, 0, 7],
  [1, 3, 2, 0, 1, 6, 1, 1, 1],
  [1, 2, 2, 3, 2, 0, 3, 0, 3],
  [0, 10, 3, 0, 0, 6, 3, 0, 2],
  [1, 1, 4, 0, 0, 0, 0, 0, 17],
  [1, 11, 1, 3, 2, 3, 1, 0, 4],
  [1, 6, 1, 2, 0, 3, 1, 0, 4],
  [1, 3, 4, 2, 2, 1, 3, 2, 2],
  [0, 5, 1, 0, 3, 2, 0, 0, 5],
  [1, 8, 4, 4, 0, 2, 1, 0, 3],
  [1, 9, 5, 7, 0, 2, 0, 0, 10],
  [0, 6, 2, 8, 4, 8, 1, 0, 4],
  [1, 10, 3, 2, 0, 2, 2, 