In [17]:
import itertools
import random

In [18]:
'''
Transformation rules:
string_tuple        num_set       binary     index
('A',)              (1,)          001        1
('B',)              (2,)          010        2
('C',)              (3,)          100        4
('A','B')           (1,2)         011        3
('A','C')           (1,3)         101        5
('B','C')           (2,3)         110        6
('A','B','C')       (1,2,3)       111        7
'''

random.seed(10)



class dataset:
    def __init__(self,N,K,input_for_characteristic_function):
        self.N = N
        self.K = K
        
        self.vfunction_for_N = [-1] * 2**N #v() for set N
        self.vfunction_for_K = [-1] * 2**N #v() for set K
        self.string_into_num = [-1] * (N+1) #string_into_num[0] = -1 forever it record the relationship between string and num
        for i in range(1,N+1):
            self.string_into_num[i] = input_for_characteristic_function[i-1]


    #transform the index of array into num set
    #input: 7   output:(1,2,3)
    def index_into_num_set(self,index):
        set = ()
        for i in range(0,self.N):
            if index & (1 << i) > 0:
                set = set + (i+1,)
        return set

    #transform num set into the index of array
    #input: (1,2,3)   output:7
    def num_set_into_index(self,set):
        index = 0
        for i in set:
            index = index + 2**(i-1)
        return index

    #transform string_tuple into num_set
    #input:('A','B','C')   output:(1,2,3)
    def string_tuple_into_num_set(self,string_tuple):
        num_set = ()
        for i in string_tuple:

            num_set = num_set + (self.string_into_num.index(i),)
        return num_set

    #transform num_set into string_tuple
    #input:(1,2,3)   output:('A','B','C')
    def num_set_into_string_tuple(self,num_set):
        string_tuple = ()
        for i in num_set:
            string_tuple = string_tuple + (self.string_into_num[i],)
        return string_tuple

    #input:(1,2)     output:[1,1,0]
    def num_set_into_learning_need(self,num_set):
        array = []
        for i in range(1,self.N+1):
            if i in num_set:
                array.append(1)
            else:
                array.append(0)
        return array

    def generate_dataset(self,characteristic_function,random_size):

        data = []
        target = []
        for i in characteristic_function.keys():
            
            self.vfunction_for_N[self.num_set_into_index(self.string_tuple_into_num_set(i))] = characteristic_function[i]
            if(len(i) <= self.K):
                self.vfunction_for_K[self.num_set_into_index(self.string_tuple_into_num_set(i))] = characteristic_function[i]
            
            data.append(self.num_set_into_learning_need(self.string_tuple_into_num_set(i)))
            target.append(characteristic_function[i])

        random_index = random.sample(range(0,len(data)),int(random_size * len(data)))


        partial_data = []
        partial_target = []
        
        for i in random_index:
            partial_data.append(data[i])
            partial_target.append(target[i])

        
        return partial_data,partial_target

In [19]:
class CGA:
    def __init__(self, N, k):
        self.N = N
        self.order = k
        self.gen_w_vec()
        self.gen_v_func()

    def gen_w_vec(self):
        self.w_vec = {}
        self.coalitions = []
        for i in range(1, self.order+1):
            for j in list(itertools.combinations(self.N, i)):
                self.w_vec[j] = 0

    def gen_v_func(self):
        self.v_func = {}
        for i in range(1, len(self.N)+1):
            for j in itertools.combinations(self.N, i):
                value = 0
                for m in range(1, self.order+1):
                    if m > len(j):
                        break
                    for n in itertools.combinations(j, m):
                        value = value+self.w_vec[n]
                self.v_func[j] = value

    def update_w_vec(self, w_v_new):
        for i in w_v_new:
            if i in self.w_vec:
                self.w_vec[i] = w_v_new[i]
        self.gen_v_func()

    def show_w_vec(self):
        print(self.w_vec)

    def show_v_func(self):
        print(self.v_func)

    def show_coalitions(self):
        print(self.coalitions)


def harsanyiDividends(N, v_func):
    dividends = {}
    for size in range(1, len(N)+1):
        for coalition in set(itertools.combinations(N, r=size)):
            base, shift = v_func[coalition], 0
            for smallsize in range(1, size):
                for subs in set(itertools.combinations(coalition, r=smallsize)):
                    shift += dividends[subs]
            dividends[coalition] = base - shift
    return dividends


In [20]:
N=["A","B","C"]
v_func={('A',): 1, ('B',): 1, ('C',): 1, ('A', 'B'): 2, ('A', 'C'): 2, ('B', 'C'): 2, ('A', 'B', 'C'): 4}

cga=CGA(N,2)
cga.show_v_func()
cga.show_w_vec()

print(harsanyiDividends(N,v_func))
cga.update_w_vec(harsanyiDividends(N,v_func))
cga.show_v_func()


{('A',): 0, ('B',): 0, ('C',): 0, ('A', 'B'): 0, ('A', 'C'): 0, ('B', 'C'): 0, ('A', 'B', 'C'): 0}
{('A',): 0, ('B',): 0, ('C',): 0, ('A', 'B'): 0, ('A', 'C'): 0, ('B', 'C'): 0}
{('C',): 1, ('A',): 1, ('B',): 1, ('A', 'C'): 0, ('A', 'B'): 0, ('B', 'C'): 0, ('A', 'B', 'C'): 1}
{('A',): 1, ('B',): 1, ('C',): 1, ('A', 'B'): 2, ('A', 'C'): 2, ('B', 'C'): 2, ('A', 'B', 'C'): 3}


In [21]:
characteristic_function = {('A',):200,('B',):100,('C',):300,('A','B'):400,('B','C'):500,('A','C'):500,('A','B','C'):800}
input_for_characteristic_function = ('A','B','C')
N = 3 # # of element in set N
K = 2 # the maximum number of element in set K
random_size = 1 # how much data should we randomize
data1 = dataset(N,K,input_for_characteristic_function)
data,target = data1.generate_dataset(characteristic_function,random_size)
print("data:",data)
print("target:",target)

data: [[0, 1, 1], [1, 0, 0], [1, 1, 0], [1, 1, 1], [0, 0, 1], [1, 0, 1], [0, 1, 0]]
target: [500, 200, 400, 800, 300, 500, 100]


In [22]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDRegressor

'''
data = [
    [1,0,0],
    [0,1,0],
    [0,0,1],
    [1,1,0],
    [1,0,1],
    [0,1,1],
    [1,1,1]
]
target = [200,100,300,400,500,500,800]
'''
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33)

sgd = SGDRegressor(penalty = 'l2',max_iter=1e9)
sgd.fit(X_train, y_train)
y_pred_sklearn = sgd.predict(X_test)
print(X_test)
print(y_pred_sklearn)

[[1, 1, 1], [1, 1, 0], [0, 0, 1]]
[549.55205448 200.94844798 449.21942786]


In [23]:
print(X_train)
print(y_train)
X_train_new = [
    [1,0,1,0,1,0],
    [0,0,1,0,0,0],
    [1,1,0,1,0,0],
    [1,1,1,1,1,1]
]
X_train_new = np.array(X_train_new).T
print(X_train_new)

[[1, 0, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1]]
[200, 500, 100, 500]
[[1 0 1 1]
 [0 0 1 1]
 [1 1 0 1]
 [0 0 1 1]
 [1 0 0 1]
 [0 0 0 1]]


In [24]:
from scipy.special import comb, perm

def compute_w_vec_size(N,k):
    C = 0
    for i in range(k+1):
        C = C + comb(N,i)
    return C

N=3
k=2
print(compute_w_vec_size(3,2))

7.0


In [25]:
n_iter=100  # number of iterations
r=0.1      # learning rate

N=len(X_train)
cga = CGA([1,2,3],2)
w_BGD = np.array(list(cga.w_vec.values())).reshape(-1,1)
length = len(cga.w_vec.values())
gradient = np.zeros((1,length))

for i in range(6):
    gradient[0][i] = np.random.rand()
             
for j in range(n_iter):
    for i in range(N):
        temp = X_train_new.T[i].reshape(-1,1)
        gradient = gradient + 2 *(np.dot(np.array(w_BGD).T,temp)-y_train[i])* temp.T
    gradient = gradient/N
    w_BGD = (1-r/N)*w_BGD - r*gradient.T
    
# print(w_BGD)
    
new_w_vec={}
for i in range(len(cga.w_vec.keys())):
    new_w_vec[list(cga.w_vec.keys())[i]]=w_BGD[i]
    
cga.update_w_vec(new_w_vec)
y_pred_BGD = cga.v_func
print(y_pred_BGD)
print(target)

{(1,): array([-50.64034413]), (2,): array([75.10005665]), (3,): array([349.16490466]), (1, 2): array([99.5587301]), (1, 3): array([247.37743373]), (2, 3): array([498.85718701]), (1, 2, 3): array([472.16873366])}
[500, 200, 400, 800, 300, 500, 100]
