### IMPORTAÇÕES 

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sympy as symbols
from pandas.api.types import is_numeric_dtype

### DATASET 

In [10]:
input_file = 'C:\\JupyterLab\\PAPL-UFPE\\ML-Fuzzy\\Testes\\iris.data'
output_file = 'iris.csv'
iris_data = pd.read_csv(input_file, header=None, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])
iris_data.to_csv(output_file, index=False)

# normalização 
for index in iris_data.columns: 
    if is_numeric_dtype(iris_data[index][0]):
        iris_data[index] = iris_data[index]/max(iris_data[index])
        
iris_data = np.array(iris_data)
iris_data

array([[0.6455696202531644, 0.7954545454545454, 0.20289855072463767,
        0.08, 'Iris-setosa'],
       [0.620253164556962, 0.6818181818181818, 0.20289855072463767, 0.08,
        'Iris-setosa'],
       [0.5949367088607594, 0.7272727272727273, 0.18840579710144928,
        0.08, 'Iris-setosa'],
       [0.5822784810126581, 0.7045454545454545, 0.21739130434782608,
        0.08, 'Iris-setosa'],
       [0.6329113924050632, 0.8181818181818181, 0.20289855072463767,
        0.08, 'Iris-setosa'],
       [0.6835443037974683, 0.8863636363636362, 0.24637681159420288,
        0.16, 'Iris-setosa'],
       [0.5822784810126581, 0.7727272727272726, 0.20289855072463767,
        0.12, 'Iris-setosa'],
       [0.6329113924050632, 0.7727272727272726, 0.21739130434782608,
        0.08, 'Iris-setosa'],
       [0.5569620253164557, 0.6590909090909091, 0.20289855072463767,
        0.08, 'Iris-setosa'],
       [0.620253164556962, 0.7045454545454545, 0.21739130434782608, 0.04,
        'Iris-setosa'],
       [0.68

In [11]:
# tirando a parte textual de classificação da espécie
iris_data_without_last_column = iris_data[:, :-1]
iris_data_without_last_column

array([[0.6455696202531644, 0.7954545454545454, 0.20289855072463767,
        0.08],
       [0.620253164556962, 0.6818181818181818, 0.20289855072463767, 0.08],
       [0.5949367088607594, 0.7272727272727273, 0.18840579710144928,
        0.08],
       [0.5822784810126581, 0.7045454545454545, 0.21739130434782608,
        0.08],
       [0.6329113924050632, 0.8181818181818181, 0.20289855072463767,
        0.08],
       [0.6835443037974683, 0.8863636363636362, 0.24637681159420288,
        0.16],
       [0.5822784810126581, 0.7727272727272726, 0.20289855072463767,
        0.12],
       [0.6329113924050632, 0.7727272727272726, 0.21739130434782608,
        0.08],
       [0.5569620253164557, 0.6590909090909091, 0.20289855072463767,
        0.08],
       [0.620253164556962, 0.7045454545454545, 0.21739130434782608, 0.04],
       [0.6835443037974683, 0.8409090909090908, 0.21739130434782608,
        0.08],
       [0.6075949367088607, 0.7727272727272726, 0.2318840579710145, 0.08],
       [0.607594936

### CÓDIGO

> Aqui ele está mais objetivo, com mais comentários e alterações consulte os documentos de teste

In [12]:
# valor definido pois conheço o dataset
K = 3

# fuzziness value 
m = [1.1, 1.6, 2]
# valores da lista indicados pelo professor

# membership
U = np.random.rand(iris_data_without_last_column.shape[0], K)
U # dados já foram normalizados

def calculando_centroides(iris_data_without_last_column, K, U, m):
    centroides = np.zeros((K, iris_data_without_last_column.shape[1]))
    for i in range(K):
        U_corrigido = np.tile(U[:, i][:, np.newaxis], (1, iris_data_without_last_column.shape[1]))
        m_expandido = np.tile(m[i], (iris_data_without_last_column.shape[0], iris_data_without_last_column.shape[1]))
        centroides[i, :] = np.sum((U_corrigido ** m_expandido) * iris_data_without_last_column, axis=0) / np.sum(U_corrigido ** m_expandido)
    return centroides

centroides = calculando_centroides(iris_data_without_last_column, K, U, m)

def matriz_dg(iris_data_without_last_column, centroides):
    num_points = iris_data_without_last_column.shape[0]
    num_centroides = centroides.shape[0]

    distancias = np.zeros((num_points, num_centroides))

    for i in range(num_points):
        for j in range(num_centroides):
            distancias[i, j] = np.linalg.norm(iris_data_without_last_column[i] - centroides[j])

    return distancias

d_matriz = matriz_dg(iris_data_without_last_column, centroides)

# Exibe a matriz de distâncias
print("Matriz de Distâncias:")
print(d_matriz)

def fun_objetivo(U, matriz_dg, m):
    c, n = U.shape
    objetivo = 0

    for i in range(c):
        for j in range(n):
            objetivo += U[i, j] ** m * matriz_dg[i, j] ** 2

    return objetivo

def membership_c(iris_data_without_last_column, centroides, K, m):
    U_novo = np.zeros_like(d_matriz)

    for i in range(K):
        U_novo[:, i] = d_matriz[:, i]

    for j in range(len(m)):
        U_novo = 1/(U_novo ** (2/(m[j]-1)) * np.sum((1/U_novo) ** (2/(m[j]-1)), axis=1)[:, np.newaxis])

    return U_novo

U_novo = membership_c(iris_data_without_last_column, centroides, K, m)
N = iris_data_without_last_column.shape[0] # número de dados
velho_obj = fun_objetivo(U_novo, d_matriz, m)

maximo = 100

# Inicializar valores de pertinência e centroides
U = np.random.rand(iris_data_without_last_column.shape[0], K)
centroides = calculando_centroides(iris_data_without_last_column, K, U, m)

# Calcular matriz de distâncias inicial
J_anterior = matriz_dg(iris_data_without_last_column, centroides)

for iteration in range(maximo):
    # atualizar valores 
    U_novo = membership_c(iris_data_without_last_column, centroides, K, m)
    centroides = calculando_centroides(iris_data_without_last_column, K, U_novo, m)
    
    nova_J = matriz_dg(iris_data_without_last_column, centroides)

    novo_objetivo = fun_objetivo(U_novo, nova_J, m)

    # verificando a convergência
    if (np.all(nova_J <= J_anterior) or 
        np.all(np.abs(novo_objetivo - velho_obj) < 1e-6) or
        iteration == maximo - 1):
        break

Matriz de Distâncias:
[[0.78085891 0.77851384 0.77599968]
 [0.67679259 0.67443937 0.67186401]
 [0.69512082 0.69276757 0.69036215]
 [0.67246674 0.67011275 0.66744807]
 [0.79187462 0.78953319 0.78704236]
 [0.88116182 0.87896862 0.87552347]
 [0.72522356 0.722961   0.72006281]
 [0.75668263 0.75433606 0.75170142]
 [0.61967821 0.61731633 0.61475922]
 [0.69897548 0.69654726 0.69430212]
 [0.84104138 0.83870114 0.83609092]
 [0.74368659 0.74134288 0.73860892]
 [0.67209266 0.66965816 0.66754401]
 [0.63069537 0.62825434 0.62662393]
 [0.92256642 0.92023313 0.9179251 ]
 [0.99276836 0.99056915 0.98744478]
 [0.87560687 0.87341233 0.87035335]
 [0.77997836 0.77770915 0.7748114 ]
 [0.88437182 0.88210331 0.87896054]
 [0.83685981 0.83459588 0.8316663 ]
 [0.79147387 0.78913413 0.78626394]
 [0.81927655 0.81708838 0.81377503]
 [0.76113879 0.75881025 0.75681901]
 [0.75399358 0.75190548 0.74785423]
 [0.75069262 0.74835981 0.74529425]
 [0.68859431 0.68624777 0.68342076]
 [0.7587289  0.75654648 0.75301798]
 [0.78

In [13]:
# atualizações
velho_obj = novo_objetivo
J_anterior = nova_J
U = U_novo

print("Centroides finais:")
print(centroides)
print()
print("Pertinências dos dados aos centroides:")
print(U)

Centroides finais:
[[0.18607815 0.17560184 0.13678891 0.12107101]
 [0.18086222 0.17889211 0.12098006 0.10238987]
 [0.18605886 0.1693289  0.14327202 0.12764855]]

Pertinências dos dados aos centroides:
[[0.20868108 0.31163614 0.47968279]
 [0.19080418 0.30358386 0.50561196]
 [0.19720962 0.30995195 0.49283843]
 [0.18828385 0.3005177  0.51119845]
 [0.21070983 0.31271992 0.47657025]
 [0.21063026 0.29364831 0.49572143]
 [0.19576202 0.29693855 0.50729943]
 [0.20302642 0.30718873 0.48978485]
 [0.17992599 0.29937915 0.52069487]
 [0.19855261 0.3157838  0.48566358]
 [0.21519105 0.31201474 0.4727942 ]
 [0.19935171 0.30366677 0.49698152]
 [0.19625715 0.31838858 0.48535427]
 [0.19807583 0.33218838 0.46973579]
 [0.22931964 0.32142392 0.44925644]
 [0.22767937 0.30601375 0.46630687]
 [0.21606477 0.30192309 0.48201213]
 [0.20405512 0.30092649 0.4950184 ]
 [0.21389719 0.30125286 0.48484995]
 [0.21148617 0.30349037 0.48502346]
 [0.20423635 0.30308629 0.49267736]
 [0.20474849 0.29247347 0.50277804]
 [0.215