In [484]:
#全データに対応できる汎用的なモデルを作成する
#簡略型ファジィ推論モデルについて示す
import numpy as np
import pandas as pd
import random
import math
import sys
import time

In [485]:
#各種データを読み込む
original_data = pd.read_csv('winequality-red-test.csv')

In [486]:
#次に各種データに対して正規化を行っていく
#ただし、例外のカラムを設けておく
#正規化をあらかじめ行いたいカラムについてはその例外にカラム名を付与しておく

#まず例外カラム名の制定
#各種データごとに確認・変更してほしい箇所
exception_columns = []
for clm in original_data.columns:
    if clm not in exception_columns:
        original_data[clm] = (original_data[clm] - original_data[clm].min()) / (original_data[clm].max() - original_data[clm].min())

In [487]:
#ここで各種データをnumpy配列に変換しておく
np_original_data = np.array(original_data)

In [488]:
#次に教師データとテストデータに分割する
#ここでは教師データ:テストデータを5:5とする

#教師データ、テストデータの割合の設定
#各種データごとに確認・変更してほしい箇所
##########################################################################################################################
##########################################################################################################################
TEACHER_SIZE_RATIO = 5
TEST_SIZE_RATIO = 5
##########################################################################################################################
##########################################################################################################################

#上で定めた割合に基づき、教師データ数とテストデータ数を求める
TEACHER_SIZE = ((int)(len(np_original_data) * ((TEACHER_SIZE_RATIO) / (TEACHER_SIZE_RATIO + TEST_SIZE_RATIO)))) + 1
TEST_SIZE = len(np_original_data) - TEACHER_SIZE

In [489]:
#次に教師データとテストデータに分割する
teacher_data = np_original_data[:TEACHER_SIZE,:]
test_data = np_original_data[TEACHER_SIZE:,:]

In [490]:
#元データの入力データと出力データの合計の属性数を求める
NUMBER_OF_DATA_VARIATION = len(teacher_data[0,:])

In [491]:
#説明変数と被説明変数とに分割する
#被説明変数は最後の要素のみとする
x_teacher_data = teacher_data[:,:NUMBER_OF_DATA_VARIATION-1]
y_teacher_data = teacher_data[:,NUMBER_OF_DATA_VARIATION-1]
x_test_data = test_data[:,:NUMBER_OF_DATA_VARIATION-1]
y_test_data = test_data[:,NUMBER_OF_DATA_VARIATION-1]

In [492]:
#次にファジィ分割数と入力データの属性数からファジィルールの総数を求める
#各種データごとに確認・変更してほしい箇所
##########################################################################################################################
##########################################################################################################################
NUMBER_OF_FUZZY_PARTITION = 3
NUMBER_OF_INPUT = NUMBER_OF_DATA_VARIATION - 1
NUMBER_OF_FUZZY_RULE = NUMBER_OF_FUZZY_PARTITION ** NUMBER_OF_INPUT
##########################################################################################################################
##########################################################################################################################

In [493]:
#前件部の初期値と後件部の初期値を設定する
#前件部の中心と幅は、ルール数×各ルールにおける入力変数の数
antecedent_center = np.empty((NUMBER_OF_FUZZY_RULE, NUMBER_OF_INPUT))
antecedent_broad = np.empty((NUMBER_OF_FUZZY_RULE, NUMBER_OF_INPUT))
consequent = np.empty(NUMBER_OF_FUZZY_RULE)

#各種データごとに確認・変更してほしい箇所
##########################################################################################################################
##########################################################################################################################
#後件部の値の初期設定を行う
for i in range(NUMBER_OF_FUZZY_RULE):
    consequent[i] = 0.5
#前件部を初期設定する
for i in range(NUMBER_OF_FUZZY_RULE):
    for j in range(NUMBER_OF_INPUT):
        if ((int)(i / (NUMBER_OF_FUZZY_PARTITION ** (NUMBER_OF_INPUT - (j+1))))) % NUMBER_OF_FUZZY_PARTITION == 0:
            antecedent_center[i,j] = 0
            antecedent_broad[i,j] = 1
        elif ((int)(i / (NUMBER_OF_FUZZY_PARTITION ** (NUMBER_OF_INPUT - (j+1))))) % NUMBER_OF_FUZZY_PARTITION == 1:
            antecedent_center[i,j] = 0.5
            antecedent_broad[i,j] = 0.5
        else:
            antecedent_center[i,j] = 1
            antecedent_broad[i,j] = 1
##########################################################################################################################
##########################################################################################################################

In [494]:
%%time
#次に学習を行う
#前件部の学習部分は一旦フェードアウトしている

#平均二条誤差を格納する変数を用意する
mean_squared_error = 0

#学習回数を設定する
TRAIN_TIME = 1000
#学習係数を設定する
LEARNING_CONSEQUENT = 0.001
#まず、各ルールに基づいて入力変数をファジィ化（メンバシップ関数に代入）する
membership_function = np.empty((TEACHER_SIZE, NUMBER_OF_FUZZY_RULE, NUMBER_OF_INPUT))
adaptability = np.ones((TEACHER_SIZE, NUMBER_OF_FUZZY_RULE))

for i in range(TEACHER_SIZE):
    for j in range(NUMBER_OF_FUZZY_RULE):
        for k in range(NUMBER_OF_INPUT):
            if(x_teacher_data[i,k] >= antecedent_center[j,k] - antecedent_broad[j,k]) and (x_teacher_data[i,k] <= antecedent_center[j,k]):
                membership_function[i,j,k] = (x_teacher_data[i,k] - (antecedent_center[j,k] - antecedent_broad[j,k])) / antecedent_broad[j,k]
            elif(x_teacher_data[i,k] > antecedent_center[j,k]) and (x_teacher_data[i,k] <= antecedent_center[j,k] + antecedent_broad[j,k]):
                membership_function[i,j,k] = -(x_teacher_data[i,k] - (antecedent_center[j,k] + antecedent_broad[j,k])) / antecedent_broad[j,k]
        #各ルールにおける適合度を求める
        for k in range(NUMBER_OF_INPUT):
            adaptability[i,j] = adaptability[i,j] * membership_function[i,j,k]
for time in range(TRAIN_TIME):
    for i in range(TEACHER_SIZE):
        output = 0
        output = np.sum(np.dot(adaptability[i], consequent)) / np.sum(adaptability[i])
        if (time == TRAIN_TIME - 1):
            print(output, y_teacher_data[i], i)
            mean_squared_error = mean_squared_error + (output - y_teacher_data[i]) ** 2
            if(i == TEACHER_SIZE - 1):
                mean_squared_error = mean_squared_error / TEACHER_SIZE
                print('平均二条誤差')
                print(mean_squared_error)
        #後件部の更新を行う
        for j in range(NUMBER_OF_FUZZY_RULE):
            consequent[j] = consequent[j] + (LEARNING_CONSEQUENT * adaptability[i,j] / np.sum(adaptability[i])) * (y_teacher_data[i] - output)

# for time in range(TRAIN_TIME):
#     for i in range(TEACHER_SIZE):
#         #適合度を初期化する
#         adaptability = np.ones(NUMBER_OF_FUZZY_RULE)
#         for j in range(NUMBER_OF_FUZZY_RULE):
#             for k in range(NUMBER_OF_INPUT):
#                 if(x_teacher_data[i,k] >= antecedent_center[j,k] - antecedent_broad[j,k]) and x_teacher_data[i,k] <= antecedent_center[j,k]:
#                     membership_function[j,k] = (x_teacher_data[i,k] - (antecedent_center[j,k] - antecedent_broad[j,k])) / antecedent_broad[j,k]
#                 elif(x_teacher_data[i,k] > antecedent_center[j,k]) and (x_teacher_data[i,k] <= antecedent_center[j,k] + antecedent_broad[j,k]):
#                     membership_function[j,k] = -(x_teacher_data[i,k] - (antecedent_center[j,k] + antecedent_broad[j,k])) / antecedent_broad[j,k]
#             #各ルールにおける適合度を求める
#             for k in range(NUMBER_OF_INPUT):
#                 adaptability[j] = adaptability[j] * membership_function[j,k]
#         #各データに対して予測結果を求める
#         output = 0
#         output = np.sum(np.dot(adaptability, consequent)) / np.sum(adaptability)
#         if(time == TRAIN_TIME-1):
#             print(output, y_teacher_data[i],i)
#             mean_squared_error = mean_squared_error + (output - y_teacher_data[i])**2
#             if(i == TEACHER_SIZE-1):
#                 mean_squared_error = mean_squared_error / TEACHER_SIZE
#                 print("平均二条誤差:")
#                 print(mean_squared_error)
#         #出力値を求めた次に後件部と前件部の更新を行う
#         for j in range(NUMBER_OF_FUZZY_RULE):
#             consequent[j] = consequent[j] + (LEARNING_CONSEQUENT * adaptability[j] / np.sum(adaptability)) * (y_teacher_data[i] - output)
#             #for k in range(NUMBER_OF_INPUT):
#                 #antecedent_center[j,k] = antecedent_center[j,k] + (LEARNING_ANTECEDENT_CENTER * adaptability[j] / np.sum(adaptability)) * (y_teacher_data[i] - output) * (consequent[j] - output) * delta_center[j,k]
#                 #antecedent_broad[j,k] = antecedent_broad[j,k] + (LEARNING_ANTECEDENT_BROAD * adaptability[j] / np.sum(adaptability)) * (y_teacher_data[i] - output) * (consequent[j] - output) * delta_broad[j,k]
max_consequent = consequent[0]
max_consequent_number = 0
for i in range(NUMBER_OF_FUZZY_RULE):
    if max_consequent < consequent[i]:
        max_consequent_number = i
        max_consequent = consequent[i]
print(max_consequent)
print(max_consequent_number)

0.4771897680838631 0.4 0
0.49010876001095766 0.4 1
0.4889508230992308 0.4 2
0.5212121168802024 0.6 3
0.4771884158385711 0.4 4
0.48004695791295154 0.4 5
0.48420163648301523 0.4 6
0.47031081255261403 0.7999999999999999 7
0.483148861900796 0.7999999999999999 8
0.5033559467695046 0.4 9
0.4892093117516931 0.4 10
0.5033558194163903 0.4 11
0.47906173008842723 0.4 12
0.5149849388871431 0.4 13
0.498885262184402 0.4 14
0.4991039641491395 0.4 15
0.5069999923246039 0.7999999999999999 16
0.5049023098012501 0.4 17
0.4889608760196459 0.20000000000000007 18
0.5098601587727591 0.6 19
0.5152933260994935 0.6 20
0.5058377271748138 0.4 21
0.5142281066186192 0.4 22
0.4951939121781396 0.4 23
0.5026696476548633 0.6 24
0.501879667833962 0.4 25
0.5158706899236511 0.4 26
0.5142269395498236 0.4 27
0.47825756296666055 0.4 28
0.4784372988027571 0.6 29
0.48843338790653285 0.4 30
0.4867057997170509 0.6 31
0.49468270499900124 0.4 32
0.4976977341365537 0.6 33
0.5022596502619424 0.4 34
0.4839371676995633 0.6 35
0.495773

In [495]:
#次にテストデータを用いて結果を確認する

#正答率を求めるために正答数を数える
number_of_accuracy = 0

#各ルールに基づいてテストデータの入力変数をファジィ化する
test_membership_function = np.empty((NUMBER_OF_FUZZY_RULE, NUMBER_OF_INPUT))

test_mean_squared_error = 0

for i in range(TEST_SIZE):
    #適合度を初期化する
    test_adaptability = np.ones(NUMBER_OF_FUZZY_RULE)
    for j in range(NUMBER_OF_FUZZY_RULE):
        for k in range(NUMBER_OF_INPUT):
            if(x_test_data[i,k] >= antecedent_center[j,k] - antecedent_broad[j,k]) and x_test_data[i,k] <= antecedent_center[j,k]:
                test_membership_function[j,k] = (x_test_data[i,k] - ( antecedent_center[j,k] - antecedent_broad[j,k])) / antecedent_broad[j,k]
            elif(x_test_data[i,k] > antecedent_center[j,k]) and (x_test_data[i,k] <= antecedent_center[j,k] + antecedent_broad[j,k]):
                test_membership_function[j,k] = -(x_test_data[i,k] - (antecedent_center[j,k] + antecedent_broad[j,k])) / antecedent_broad[j,k]
        #各ルールにおける適合度を求める
        for k in range(NUMBER_OF_INPUT):
            test_adaptability[j] = test_adaptability[j] * test_membership_function[j,k]
    #各データに対して予測結果を求める
    test_output = 0
    test_output = np.sum(np.dot(test_adaptability, consequent)) / np.sum(test_adaptability)
    print(test_output, y_test_data[i])
    #平均二条誤差を求める
    test_mean_squared_error = test_mean_squared_error + (test_output - y_test_data[i])**2
    #正答率を求める
    if test_output < 0.33 and y_test_data[i] == 0:
        number_of_accuracy = number_of_accuracy + 1
    elif test_output < 0.66 and y_test_data[i] == 0.5:
        number_of_accuracy = number_of_accuracy + 1
    elif test_output >0.66 and y_test_data[i] == 1:
        number_of_accuracy = number_of_accuracy + 1
    if i == TEST_SIZE-1:
        test_mean_squared_error = test_mean_squared_error / TEST_SIZE
        print("平均二条誤差")
        print(test_mean_squared_error)
#正答率を表示する
print(number_of_accuracy / TEST_SIZE)

0.4931113352233529 0.4
0.4890207415439141 0.4
0.4944999808730151 0.7999999999999999
0.49624392227394765 0.6
0.5067228314413991 0.6
0.5449621929337101 0.7999999999999999
0.5531755065950407 0.7999999999999999
0.5449621929337101 0.7999999999999999
0.5060660236560519 0.4
0.5128732343624324 0.6
0.4881368933832337 0.4
0.5264155947084433 0.6
0.5123730922251668 0.4
0.5134003525015186 0.20000000000000007
0.5169906187302535 0.6
0.5123730922251668 0.4
0.506014491723704 0.6
0.5279153971003748 0.6
0.47912866731958653 0.4
0.4950387652238306 0.4
0.4778276664022129 0.4
0.49323588741714747 0.7999999999999999
0.497300509107724 0.4
0.497300509107724 0.4
0.5078590762247187 0.4
0.4967103844475045 0.4
0.5399651774925383 0.7999999999999999
0.4967103844475045 0.4
0.4998583415684558 1.0
0.4957596117490456 0.6
0.4796728813849887 0.20000000000000007
0.4957596117490456 0.6
0.509663285624481 0.0
0.5085306994659667 0.20000000000000007
0.4979940445445089 0.4
0.490463731591646 0.4
0.49941782929327455 0.79999999999999