# Installation and Imports

In [1]:
# pyFUME & other packages installation
!pip install pyFUME



In [2]:
# libraries imports

# Read and Pre-process the dataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Fuzzy set and model
from pyfume.Clustering import Clusterer
from pyfume.EstimateAntecendentSet import AntecedentEstimator
from pyfume.EstimateConsequentParameters import ConsequentEstimator
from pyfume.SimpfulModelBuilder import SugenoFISBuilder
from pyfume.Tester import SugenoFISTester

# Model Performance
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score, mean_squared_error, mean_absolute_percentage_error, explained_variance_score
from numpy import clip, column_stack, argmax


In [3]:
# Function to calculate Variance Accounted For (VAF)
def calculate_vaf(actual, predicted):
    error = actual - predicted
    var_error = np.var(error)
    var_actual = np.var(actual)
    vaf = (1 - var_error / var_actual) * 100
    return vaf

# Pre-Process dataset


In [4]:
# Loading the dataset
data = pd.read_csv('/content/wbco.csv',sep=',',header = None, names=['x1','x2','x3','x4','x5','x6','x7','x8','x9','y'])
print(data.head(20))

# Variables' names
features = data.columns[:-1]
print(features)

# Display data information (check for missing values, data types)
print(data.info())

# Check the distribution of the class labels
print(data['y'].value_counts())

# Replace '?' with NaN
data.replace('?', np.nan, inplace=True)

# Check for missing values
print(data.isnull().sum())

#  Drop rows with missing values
data_cleaned = data.dropna(how = 'any')

# Convert a specific column to int64
data_cleaned['x6'] = pd.to_numeric(data_cleaned['x6'], errors='coerce')

# Display data information (check for missing values, data types)
print(data_cleaned.info())

# Check the distribution of the class labels
print(data_cleaned['y'].value_counts())


    x1  x2  x3  x4  x5  x6  x7  x8  x9  y
0    5   1   1   1   2   1   3   1   1  0
1    5   4   4   5   7  10   3   2   1  0
2    3   1   1   1   2   2   3   1   1  0
3    6   8   8   1   3   4   3   7   1  0
4    4   1   1   3   2   1   3   1   1  0
5    8  10  10   8   7  10   9   7   1  1
6    1   1   1   1   2  10   3   1   1  0
7    2   1   2   1   2   1   3   1   1  0
8    2   1   1   1   2   1   1   1   5  0
9    4   2   1   1   2   1   2   1   1  0
10   1   1   1   1   1   1   3   1   1  0
11   2   1   1   1   2   1   2   1   1  0
12   5   3   3   3   2   3   4   4   1  1
13   1   1   1   1   2   3   3   1   1  0
14   8   7   5  10   7   9   5   5   4  1
15   7   4   6   4   6   1   4   3   1  1
16   4   1   1   1   2   1   2   1   1  0
17   4   1   1   1   2   1   3   1   1  0
18  10   7   7   6   4  10   4   1   2  1
19   6   1   1   1   2   1   3   1   1  0
Index(['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9'], dtype='object')
<class 'pandas.core.frame.DataFrame'>
Ra

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_cleaned['x6'] = pd.to_numeric(data_cleaned['x6'], errors='coerce')


In [5]:
# Scalling each feature to a range between 0 and 1
data_cleaned = data_cleaned.to_numpy()
scaler = MinMaxScaler()
df = scaler.fit_transform(data_cleaned)

print(df)

[[0.44444444 0.         0.         ... 0.         0.         0.        ]
 [0.44444444 0.33333333 0.33333333 ... 0.11111111 0.         0.        ]
 [0.22222222 0.         0.         ... 0.         0.         0.        ]
 ...
 [0.44444444 1.         1.         ... 1.         0.11111111 1.        ]
 [0.33333333 0.77777778 0.55555556 ... 0.55555556 0.         1.        ]
 [0.33333333 0.77777778 0.77777778 ... 0.33333333 0.         1.        ]]


In [6]:
X = df[:,:-1]
y = df[:,-1]

# Spliting the dataset in a train set and test set

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model


In [7]:
# Parameter to search: - number of cluster: n_cl;
#                    : - method = fcm (Fuzzy c-means),
#                                 fst-pso(Fuzzy self tunning particle swarm optimization),
#                                 gk (Gustafson-Kessel)

# Cluster training data
n_cl = 3
cl = Clusterer(n_cl,X_train,y_train)
cluster_centers , partition_matrix, _ = cl.cluster(method = 'fcm')


In [8]:
# Estimate the membership functions of the system. default: mf_shape = gaussian
ae = AntecedentEstimator(X_train, partition_matrix)
antecedent_params = ae.determineMF()

#Parameter to search: - Global optimization vs Local optimization

# Estimate the parameters of the consequence. default: global fitting = True
ce = ConsequentEstimator(X_train,y_train, partition_matrix)
consequent_params = ce.suglms(global_fit=True)

In [11]:
# %% Build first-order Takagi-Sugeno model
modbuilder = SugenoFISBuilder(antecedent_params, consequent_params, features, save_simpful_code=False)
model = modbuilder.get_model()

# %% Get model predictions
modtester = SugenoFISTester(model, X_test, features)
y_pred = modtester.predict()[0]

# %% Compute regression metrics
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error: {:.3f}".format(mse))
vaf = calculate_vaf(y_test, y_pred)
print("Variance Accounted for: {:.1f}".format(vaf))
exp_var = explained_variance_score(y_test, y_pred)
print("Explained Variance Score: {:.3f}".format(exp_var))

 * Detected 3 rules / clusters
 * Detected Sugeno model type
Mean Squared Error: 0.390
Variance Accounted for: -9.0
Explained Variance Score: -0.090


In [12]:
ncl = [2, 3, 5, 8]
mtd = ['fcm','gk']
glb_ft = [True , False]

globaly_mse = []
globaly_acc = []
globaly_exp_var = []

localy_mse = []
localy_acc = []
localy_exp_var = []



for ft in glb_ft:
  for mth in mtd:
    for n in ncl:
      if ft == True:
        opt_ft = 'global'
      else:
        opt_ft = 'local'

      # Cluster training data
      cl = Clusterer(n,X_train,y_train)
      cluster_centers , partition_matrix, _ = cl.cluster(method = mth)

      # Estimate the membership functions of the system. default: mf_shape = gaussian
      ae = AntecedentEstimator(X_train, partition_matrix)
      antecedent_params = ae.determineMF()

      # Estimate the parameters of the consequence.
      ce = ConsequentEstimator(X_train,y_train, partition_matrix)
      consequent_params = ce.suglms(global_fit=ft)

      # %% Build first-order Takagi-Sugeno model
      modbuilder = SugenoFISBuilder(antecedent_params, consequent_params, features, save_simpful_code=False)
      model = modbuilder.get_model()

      # %% Get model predictions
      modtester = SugenoFISTester(model, X_test, features)
      y_pred = modtester.predict()[0]

      print("The following values were obtained for {} clusters, by method {} doing a {} optimization.".format(n,mth,opt_ft))

      # NOTA: talvez seja melhor user a Accuracy e VAF do que mape e exp_var
      # %% Compute regression metrics
      mse = mean_squared_error(y_test, y_pred)
      print("Mean Squared Error: {:.3f}".format(mse))
      vaf = calculate_vaf(y_test, y_pred)
      print("Variance Accounted for: {:.1f}".format(vaf))
      exp_var = explained_variance_score(y_test, y_pred)
      print("Explained Variance Score: {:.3f}".format(exp_var))



      if ft==True:
        globaly_mse.append(mse)
        globaly_acc.append(vaf)
        globaly_exp_var.append(exp_var)

      else:
        localy_mse.append(mse)
        localy_acc.append(vaf)
        localy_exp_var.append(exp_var)





 * Detected 2 rules / clusters
 * Detected Sugeno model type
The following values were obtained for 2 clusters, by method fcm doing a global optimization.
Mean Squared Error: 0.414
Variance Accounted for: -34.4
Explained Variance Score: -0.344
 * Detected 3 rules / clusters
 * Detected Sugeno model type
The following values were obtained for 3 clusters, by method fcm doing a global optimization.
Mean Squared Error: 0.394
Variance Accounted for: -9.4
Explained Variance Score: -0.094
 * Detected 5 rules / clusters
 * Detected Sugeno model type
The following values were obtained for 5 clusters, by method fcm doing a global optimization.
Mean Squared Error: 0.932
Variance Accounted for: -16.7
Explained Variance Score: -0.167
 * Detected 8 rules / clusters
 * Detected Sugeno model type
The following values were obtained for 8 clusters, by method fcm doing a global optimization.
Mean Squared Error: 10.875
Variance Accounted for: -1902.1
Explained Variance Score: -19.021
 * Detected 2 rules /

  output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  x = um.multiply(x, x, out=x)
  (y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0


 * Detected 2 rules / clusters
 * Detected Sugeno model type
The following values were obtained for 2 clusters, by method fcm doing a local optimization.
Mean Squared Error: 0.054
Variance Accounted for: 79.4
Explained Variance Score: 0.794
 * Detected 3 rules / clusters
 * Detected Sugeno model type
The following values were obtained for 3 clusters, by method fcm doing a local optimization.
Mean Squared Error: 0.050
Variance Accounted for: 80.2
Explained Variance Score: 0.802
 * Detected 5 rules / clusters
 * Detected Sugeno model type
The following values were obtained for 5 clusters, by method fcm doing a local optimization.
Mean Squared Error: 0.054
Variance Accounted for: 80.2
Explained Variance Score: 0.802
 * Detected 8 rules / clusters
 * Detected Sugeno model type
The following values were obtained for 8 clusters, by method fcm doing a local optimization.
Mean Squared Error: 0.057
Variance Accounted for: 80.0
Explained Variance Score: 0.800
 * Detected 2 rules / clusters
 * De

In [None]:
# Plots
# Fazer

# Lixo
