In [1]:
import pandas as pd
import numpy as np
import math
import random

In [2]:
class ProcessingData:
    @staticmethod
    def shuffleDF(data:pd.DataFrame)->pd.DataFrame:
        for i in range(len(data)-1,0,-1):
            x=random.randint(0,i)
            data.iloc[i], data.iloc[x] = data.iloc[x], data.iloc[i]
        return data
    
    @staticmethod
    def normalizeDF(data:pd.DataFrame)->pd.DataFrame:
        col=data.columns.tolist()
        col=col[:-1]
        for j in col:
            minn=data[j].min()
            maxx=data[j].max()
            for i in range(0,len(data),1):
                data.at[i,j]=(float(data.at[i,j])- minn)/(maxx - minn)
        return data
    
    @staticmethod
    def splitDF(data:pd.DataFrame)->pd.DataFrame:
        trainDF=pd.DataFrame(columns=data.columns)
        testDF=pd.DataFrame(columns=data.columns)
        for i in range(len(data)):
            if i <= len(data)*0.7:
                trainDF=trainDF.append(data.iloc[i])
            else:
                testDF=testDF.append(data.iloc[i])
        return trainDF, testDF

In [3]:
# soft set gdzie dzielimy klasy i dla każdego atrybutu liczymy min, max oraz średnią
class SoftSet:
    @staticmethod
    def countMinMaxMean(df: pd.DataFrame):
        varieties = pd.unique(df["variety"])
        attributes = df.columns.tolist()
        attributes = attributes[:-1]
        minn = {}
        mean = {}
        maxx = {}
        one_zero = {}
        for var in varieties:
            minn[var] = {}
            mean[var] = {}
            maxx[var] = {}
            one_zero[var] = {}
            for atr in attributes:
                minn[var][atr] = df[atr][df['variety'] == var].min()
                mean[var][atr] = df[atr][df['variety'] == var].mean()
                maxx[var][atr] = df[atr][df['variety'] == var].max() 
            
                if len(df[df[atr]<mean[var][atr]]) > len(df[df[atr]>mean[var][atr]]):
                    one_zero[var][atr] = 0
                else:
                    one_zero[var][atr] = 1
                   
        return one_zero, minn, mean, maxx 
            
        
    @staticmethod
    def classify(df: pd.DataFrame, sample: list):
        one_zero, minn, mean, maxx = SoftSet().countMinMaxMean(df)
        
        varieties = pd.unique(df["variety"])
        attributes = df.columns.tolist()
        attributes = attributes[:-1]
        result = {k: 0 for k in varieties}
        
        for var in varieties:
            for index, atr in enumerate(attributes):
                if one_zero[var][atr] == 0:
                    if minn[var][atr] <= sample[index] <= mean[var][atr]:
                        result[var] += 1
                else:
                    if mean[var][atr] < sample[index] <= maxx[var][atr]:
                        result[var] += 1
         
        return max(result, key = result.get)
    
    @staticmethod
    def precision(testData: pd.DataFrame, trainData: pd.DataFrame):
        good = 0
        bad = 0
        for sample in testData.values:
            if SoftSet.classify(trainData, sample) == sample[-1]:
                good += 1
            else:
                bad += 1
                print("Sample " + sample[-1] + " classifiesd as: " + SoftSet.classify(trainData, sample))
        return good/(bad+good)*100

In [4]:
iris=pd.read_csv('/Users/glitterlungs/Studia/SEMESTR IV/Systemy sztucznej inteligencji/Zadanie nr 1/iris.csv')

iris=ProcessingData.shuffleDF(iris)
iris=ProcessingData.normalizeDF(iris)
train,test=ProcessingData.splitDF(iris)

result = SoftSet()
#print(result.countMinMaxMean(iris))
#print(result.classify(train, [0.3,0.2,0.02,0.3]))
print(result.precision(test, train))

Sample Setosa classifiesd as: Virginica
Sample Setosa classifiesd as: Virginica
Sample Setosa classifiesd as: Virginica
Sample Versicolor classifiesd as: Setosa
Sample Versicolor classifiesd as: Virginica
Sample Versicolor classifiesd as: Virginica
Sample Virginica classifiesd as: Versicolor
Sample Versicolor classifiesd as: Setosa
Sample Versicolor classifiesd as: Virginica
Sample Versicolor classifiesd as: Setosa
Sample Versicolor classifiesd as: Virginica
Sample Setosa classifiesd as: Virginica
Sample Versicolor classifiesd as: Setosa
Sample Versicolor classifiesd as: Setosa
Sample Setosa classifiesd as: Virginica
Sample Setosa classifiesd as: Virginica
63.63636363636363
