# Fuzyy logic classificator for dataset *Iris*
*Perużyński Antoni* \
*Tatarczyk Karolina*

## Imports

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Data reading and preprocessing

In [None]:
df = pd.read_csv("iris.csv")
df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [None]:
scaler = StandardScaler()
df.iloc[:,:-1] = scaler.fit_transform(df.iloc[:,:-1])
df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,-0.900681,1.019004,-1.340227,-1.315444,Setosa
1,-1.143017,-0.131979,-1.340227,-1.315444,Setosa
2,-1.385353,0.328414,-1.397064,-1.315444,Setosa
3,-1.506521,0.098217,-1.283389,-1.315444,Setosa
4,-1.021849,1.249201,-1.340227,-1.315444,Setosa


In [None]:
# df.columns
# df['variety'].unique()

## Show how to calculate step by step

### Spliting data for training and test sets

In [None]:
X_train, X_test = train_test_split(df, test_size=0.30)

### Spliting data for each category of Iris. [Setosa, Versicolor, Virginica]

In [None]:
setosaTrain = X_train[X_train['variety'] == 'Setosa']
versicolorTrain = X_train[X_train['variety'] == 'Versicolor']
virginicaTrain = X_train[X_train['variety'] == 'Virginica']

### Functions to calculate coefficiants for trapese function

In [None]:
def calculateCoeff(df, i: int):
  a = df.iloc[:,i].min() - 0.001
  d = df.iloc[:,i].max() + 0.001
  mean = df.iloc[:,i].mean()
  dfL = df[df.iloc[:,i] <= mean]
  dfR = df[df.iloc[:,i] > mean]
  b = dfL.iloc[:,i].mean()
  c = dfR.iloc[:,i].mean()
  return [a,b,c,d]

In [None]:
def calculateCoeffQuantile(df, i: int):
  a = df.iloc[:,i].min() - 0.001
  d = df.iloc[:,i].max() + 0.001
  b = df.iloc[:,i].quantile(0.3)
  c = df.iloc[:,i].quantile(0.7)
  return [a,b,c,d]

In [None]:
def calculateCoeffMedian(df, i: int):
  a = df.iloc[:,i].min() - 0.001
  d = df.iloc[:,i].max() + 0.001
  median = df.iloc[:,i].median()
  dfL = df[df.iloc[:,i] <= median]
  dfR = df[df.iloc[:,i] > median]
  b = dfL.iloc[:,i].median()
  c = dfR.iloc[:,i].median()
  return [a,b,c,d]

### Membership functions

In [None]:
def trapezeFunction(x: float, coeff):
  mu = 0
  a = coeff[0]
  b = coeff[1]
  c = coeff[2]
  d = coeff[3] 
  if x<= a:
    mu = 0
  elif x<=b:
    mu = (x-a)/(b-a)
  elif x<=c:
    mu = 1
  elif x<=d:
    mu = (d-x)/(d-c)
  else:
    mu = 0
  return mu

### Calculate coefficiants for every feature for every class in dataset

In [None]:
setosaSL = calculateCoeffMedian(setosaTrain, 0)
setosaSW = calculateCoeffMedian(setosaTrain, 1)
setosaPL = calculateCoeffMedian(setosaTrain, 2)
setosaPW = calculateCoeffMedian(setosaTrain, 3)

In [None]:
versicolorSL = calculateCoeffMedian(versicolorTrain, 0)
versicolorSW = calculateCoeffMedian(versicolorTrain, 1)
versicolorPL = calculateCoeffMedian(versicolorTrain, 2)
versicolorPW = calculateCoeffMedian(versicolorTrain, 3)

In [None]:
virginicaSL = calculateCoeffMedian(virginicaTrain, 0)
virginicaSW = calculateCoeffMedian(virginicaTrain, 1)
virginicaPL = calculateCoeffMedian(virginicaTrain, 2)
virginicaPW = calculateCoeffMedian(virginicaTrain, 3)

In [None]:
X_test['predict'] = 0
X_test.index = range(len(X_test.index))

### Calculate accuracy for testsing dataset

In [None]:
for i in range(len(X_test)):
  x = X_test.iloc[i,0]
  s1 = trapezeFunction(x, setosaSL)
  vc1 = trapezeFunction(x, versicolorSL)
  v1 = trapezeFunction(x, virginicaSL)

  x = X_test.iloc[i,1]
  s2 = trapezeFunction(x, setosaSW)
  vc2 = trapezeFunction(x, versicolorSW)
  v2 = trapezeFunction(x, virginicaSW)

  x = X_test.iloc[i,2]
  s3 = trapezeFunction(x, setosaPL)
  vc3 = trapezeFunction(x, versicolorPL)
  v3 = trapezeFunction(x, virginicaPL)

  x = X_test.iloc[i,3]
  s4 = trapezeFunction(x, setosaPW)
  vc4 = trapezeFunction(x, versicolorPW)
  v4 = trapezeFunction(x, virginicaPW)


  # arg = np.argmax(np.array([s1*s2*s3*s4, vc1*vc2*vc3*vc4, v1*v2*v3*v4]))
  # arg = np.argmax(np.array([s1+s2+s3+s4, vc1+vc2+vc3+vc4, v1+v2+v3+v4]))
  # arg = np.argmax(np.array([max(s1,s2,s3,s4), max(vc1,vc2,vc3,vc4), max(v1,v2,v3,v4)]))
  arg = np.argmax(np.array([np.mean([s1,s2,s3,s4]), np.mean([vc1,vc2,vc3,vc4]), np.mean([v1,v2,v3,v4])]))
  #arg = np.argmax(np.array([np.median([s1,s2,s3,s4]), np.median([vc1,vc2,vc3,vc4]), np.median([v1,v2,v3,v4])]))


  if arg == 0:
    X_test.loc[i,'predict'] = "Setosa"
  elif arg == 1:
    X_test.loc[i,'predict'] = "Versicolor"
  else: 
    X_test.loc[i,'predict'] = "Virginica"

sum = 0
for i in range(len(X_test)):
  if (X_test.loc[i,'variety'] == X_test.loc[i,'predict']):
    sum += 1
sum/len(X_test)

0.9333333333333333

## Calculate accuracy for algorithm using crossing validation

In [None]:
accuracy = []
presicion = []
sensitive =[]

for k in range(0,100):
  X_train, X_test = train_test_split(df, test_size=0.30)

  setosaTrain = X_train[X_train['variety'] == 'Setosa']
  versicolorTrain = X_train[X_train['variety'] == 'Versicolor']
  virginicaTrain = X_train[X_train['variety'] == 'Virginica']

  setosaSL = calculateCoeff(setosaTrain, 0) #calculateCoeffQuantile
  setosaSW = calculateCoeff(setosaTrain, 1) #calculateCoeffMedian
  setosaPL = calculateCoeff(setosaTrain, 2) #calculateCoeff
  setosaPW = calculateCoeff(setosaTrain, 3)

  versicolorSL = calculateCoeff(versicolorTrain, 0) 
  versicolorSW = calculateCoeff(versicolorTrain, 1)
  versicolorPL = calculateCoeff(versicolorTrain, 2)
  versicolorPW = calculateCoeff(versicolorTrain, 3)

  virginicaSL = calculateCoeff(virginicaTrain, 0)
  virginicaSW = calculateCoeff(virginicaTrain, 1)
  virginicaPL = calculateCoeff(virginicaTrain, 2)
  virginicaPW = calculateCoeff(virginicaTrain, 3)

  X_test['predict'] = 0
  X_test.index = range(len(X_test.index))

  for i in range(len(X_test)):
    x = X_test.iloc[i,0]
    s1 = trapezeFunction(x, setosaSL)
    vc1 = trapezeFunction(x, versicolorSL)
    v1 = trapezeFunction(x, virginicaSL)

    x = X_test.iloc[i,1]
    s2 = trapezeFunction(x, setosaSW)
    vc2 = trapezeFunction(x, versicolorSW)
    v2 = trapezeFunction(x, virginicaSW)

    x = X_test.iloc[i,2]
    s3 = trapezeFunction(x, setosaPL)
    vc3 = trapezeFunction(x, versicolorPL)
    v3 = trapezeFunction(x, virginicaPL)

    x = X_test.iloc[i,3]
    s4 = trapezeFunction(x, setosaPW)
    vc4 = trapezeFunction(x, versicolorPW)
    v4 = trapezeFunction(x, virginicaPW)



    #arg = np.argmax(np.array([s1*s2*s3*s4, vc1*vc2*vc3*vc4, v1*v2*v3*v4]))
    #arg = np.argmax(np.array([s1+s2+s3+s4, vc1+vc2+vc3+vc4, v1+v2+v3+v4]))
    #arg = np.argmax(np.array([max(s1,s2,s3,s4), max(vc1,vc2,vc3,vc4), max(v1,v2,v3,v4)]))
    #arg = np.argmax(np.array([np.mean([s1,s2,s3,s4]), np.mean([vc1,vc2,vc3,vc4]), np.mean([v1,v2,v3,v4])]))
    arg = np.argmax(np.array([np.median([s1,s2,s3,s4]), np.median([vc1,vc2,vc3,vc4]), np.median([v1,v2,v3,v4])]))

    if arg == 0:
      X_test.loc[i,'predict'] = "Setosa"
    elif arg == 1:
      X_test.loc[i,'predict'] = "Versicolor"
    else: 
      X_test.loc[i,'predict'] = "Virginica"

  sum = 0
  for i in range(len(X_test)):
    if (X_test.loc[i,'variety'] == X_test.loc[i,'predict']):
      sum += 1
  accuracy.append(sum/len(X_test))

  sensSetosa = len(X_test[(X_test['predict']=="Setosa") & (X_test["variety"]=="Setosa")])/len(X_test[X_test["variety"]=='Setosa'])
  sensVersi = len(X_test[(X_test['predict']=="Versicolor") & (X_test["variety"]=="Versicolor")])/len(X_test[X_test["variety"]=='Versicolor'])
  sensVirgi = len(X_test[(X_test['predict']=="Virginica") & (X_test["variety"]=="Virginica")])/len(X_test[X_test["variety"]=='Virginica'])

  precSetosa = len(X_test[(X_test['predict']=="Setosa") & (X_test["variety"]=="Setosa")])/len(X_test[X_test["predict"]=='Setosa'])
  precVersi = len(X_test[(X_test['predict']=="Versicolor") & (X_test["variety"]=="Versicolor")])/len(X_test[X_test["predict"]=='Versicolor'])
  precVirgi = len(X_test[(X_test['predict']=="Virginica") & (X_test["variety"]=="Virginica")])/len(X_test[X_test["predict"]=='Virginica'])

  sensitive.append(np.mean([sensSetosa, sensVersi, sensVirgi]))
  presicion.append(np.mean([precSetosa, precVersi, precVirgi]))


  sensSetosa = 0



print(f" Dokładność wynosi: {np.mean(accuracy)}")
print(f" Czułość wynosi: {np.mean(sensitive)}")
print(f" Precyzja wynosi: {np.mean(presicion)}")

 Dokładność wynosi: 0.94
 Czułość wynosi: 0.9396800480987284
 Precyzja wynosi: 0.9419849952965279


Czułość

In [None]:
len(X_test[(X_test['predict']=="Setosa") & (X_test["variety"]=="Setosa")])/len(X_test[X_test["variety"]=='Setosa'])

1.0

In [None]:
len(X_test[(X_test['predict']=="Versicolor") & (X_test["variety"]=="Versicolor")])/len(X_test[X_test["variety"]=='Versicolor'])

0.8666666666666667

In [None]:
len(X_test[(X_test['predict']=="Virginica") & (X_test["variety"]=="Virginica")])/len(X_test[X_test["variety"]=='Virginica'])

1.0

Precision

In [None]:
prec = len(X_test[(X_test['predict']=="Setosa") & (X_test["variety"]=="Setosa")])/len(X_test[X_test["predict"]=='Setosa'])

In [None]:
b = len(X_test[(X_test['predict']=="Versicolor") & (X_test["variety"]=="Versicolor")])/len(X_test[X_test["predict"]=='Versicolor'])

In [None]:
c = len(X_test[(X_test['predict']=="Virginica") & (X_test["variety"]=="Virginica")])/len(X_test[X_test["predict"]=='Virginica'])

In [None]:
(a+b+c)/3

0.9523809523809524

## Results
For crossvalidation 100 times  
For each type of function to calculate coefficiant to trapese function [Original, Quantile Median]  
For every way to calculate arg [*, +, max, mean, median]


1.   Original
*   "*" = 0.88466
*   "+" = 0.92222
*   max = 0.66244
*   mean = 0.92288
*   median = 0.94022 (n=10000: *0.9371133333333334* time = 7:54)

2.   Quantile
*   "*" = 0.89511
*   "+" = 0.90911
*   max = 0.67355
*   mean = 0.91533
*   median = 0.94466 (n=10000: *0.9397222222222222* time = 8:17)

3.   Median
*   "*" = 0.88933
*   "+" = 0.91733
*   max = 0.62177
*   mean = 0.92177
*   median = 0.94222 (n=10000: *0.9370755555555556*  time = 8:31)




