In [1]:
import numpy as np
from my_io import read_dataset_to_X_and_y
from normalization import zero_mean_unit_variance
from copy import deepcopy

In [2]:
class UniSet():
    def __init__(self, file, range_feature, range_label, normalization=False):
        np.random.seed(1)
        sample, label = read_dataset_to_X_and_y(
            file, range_feature, range_label, shuffle=False,
            about_nan='class_mean')
        self.number_of_feature = sample.shape[1]
        self.size_of_universal = sample.shape[0]
        self.universal = sample.astype(float)
        self.label = label
        self.diffrent_label = np.unique(label)
        self.number_of_diffrent_label = self.diffrent_label.shape[0]
        if normalization is True:
            self.universal = zero_mean_unit_variance(self.universal)
        self.relation = None
        self.equivalence_relation = None


uni_total = UniSet('dataset/hcvdat0.csv', (4, 14), (1, 2))

In [3]:
def split_train_test(universe: UniSet, train_size: float) -> list[UniSet]:
    train = deepcopy(universe)
    test = deepcopy(universe)
    train.size_of_universal = \
        int(universe.size_of_universal*train_size)
    train.universal = \
        universe.universal[0:train.size_of_universal]
    test.size_of_universal = (
        universe.size_of_universal - train.size_of_universal)
    test.universal = \
        universe.universal[train.size_of_universal:]
    return train, test


# uni_train, uni_test = split_train_test(uni_total, 0.8)
uni_train, uni_test = split_train_test(uni_total, 1)

In [4]:
def similarity(sample1: np.ndarray, sample2: np.ndarray) -> float:
    score = 0.0
    number_of_feature = sample1.shape[0]
    for feature in range(number_of_feature):
        score += min(
            (sample1[feature] / sample2[feature]),
            (sample2[feature] / sample1[feature]))
    score /= number_of_feature
    return score


def find_relation(universal: UniSet) -> np.ndarray:
    return np.array(
        list(map(lambda x: list(map(
            lambda y: similarity(
                universal.universal[x], universal.universal[y]),
            range(universal.size_of_universal))),
            range(universal.size_of_universal))))


uni_train.relation = find_relation(uni_train)

In [5]:
def max_min(sample1: np.ndarray, sample2: np.ndarray) -> float:
    both_sample = np.vstack((sample1, sample2))
    return np.max(np.min(both_sample, axis=0))


def composition_RoR(relation: np.ndarray) -> np.ndarray:
    result = np.array(
        list(map(lambda x: list(map(
            lambda y: max_min(relation[x], relation[y]),
            range(relation.shape[0]))),
            range(relation.shape[0]))))
    return result


def union_two_relation(
        relation1: np.ndarray, relation2: np.ndarray) -> np.ndarray:
    both_relation = np.dstack((relation1, relation2))
    return np.max(both_relation, axis=2)


def make_transitive(relation: np.ndarray) -> np.ndarray:
    R = None
    Rp = np.copy(relation)
    iter = 0
    while((Rp != R).any()):
        R = np.copy(Rp)
        RoR = composition_RoR(R)
        Rp = union_two_relation(R, RoR)
        iter += 1
        print(iter)
    return Rp


uni_train.equivalence_relation = make_transitive(uni_train.relation)

1
2
3
4
5
6


In [6]:
def is_reflexive(relation: np.ndarray) -> bool:
    return (relation.diagonal() != 0).all()


def is_symmetric(relation: np.ndarray) -> bool:
    return (relation == relation.T).all()


def is_transitive(relation: np.ndarray) -> bool:
    RoR = composition_RoR(relation)
    Rp = union_two_relation(relation, RoR)
    return (Rp == relation).all()


def is_equivalece(relation: np.ndarray) -> bool:
    return is_reflexive(relation) & is_symmetric(relation) & \
        is_transitive(relation)


print('Is our equivalence relation, equivalence?',
    f'{is_equivalece(uni_train.equivalence_relation)}')

Is our equivalence relation, equivalence? True


In [7]:
def find_similarity_class(
        universal: UniSet, target_sample: int, alpha: float) -> np.ndarray:
    size_of_universal = universal.shape[0]
    similarity_class = []
    for sample in range(size_of_universal):
        if(universal[sample, target_sample] >= alpha):
            similarity_class.append(sample)
    return np.array(similarity_class)


def find_cluster(relation: np.ndarray, alpha: float) -> list:
    size_of_universal = relation.shape[0]
    classes = []
    mark = np.zeros(size_of_universal)
    for sample in range(size_of_universal):
        if(mark[sample] == 0):
            new_class = find_similarity_class(relation, sample, alpha)
            mark[new_class] = 1
            classes.append(new_class)
    return classes


A = find_cluster(uni_train.equivalence_relation, 0.70)
print(A)

[array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 1