In [37]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

In [38]:
import pandas as pd
from bayesian.train_bn import structure_learning, parameter_learning
from preprocess.discretization import get_nodes_type, discretization, code_categories

from bayesian.save_bn import save_structure, save_params, read_structure, read_params
from external.libpgm.hybayesiannetwork import HyBayesianNetwork
from bayesian.calculate_accuracy import calculate_acc, LOO_validation

from copy import copy
from scipy import stats

import random
from sklearn.model_selection import train_test_split

In [39]:
random.seed(2000)

In [40]:
geo = pd.read_csv('../data/socio.csv')
columns = ['sex', 'relation', 'is_parent', 'has_pets', 'age', 'mean_tr', 'median_tr', 'tr_per_month']
geo = geo[columns]
geo.dropna(inplace=True)
geo.reset_index(inplace=True, drop=True)
#geo = geo[:100]

In [41]:
geo['age'] = geo['age'].apply(lambda x: float(x))
geo['sex'] = geo['sex'].apply(lambda x: str(x))
geo['relation'] = geo['relation'].apply(lambda x: str(x))
geo['is_parent'] = geo['is_parent'].apply(lambda x: str(x))
geo['has_pets'] = geo['has_pets'].apply(lambda x: str(x))

In [42]:
df_result = pd.DataFrame(columns = ['Method', 'isDisc', 'isSimple', 'isLogit'].extend(columns))
path = 'socio_results_logit.csv'
method_list = ['K2', 'MI', 'LL', 'AIC', 'BIC']
#method_list = ['AIC']

In [43]:
geo_train, geo_test = train_test_split(geo, test_size=0.1)
geo_train.reset_index(inplace=True, drop=True)
geo_test.reset_index(inplace=True, drop=True)

In [44]:
geo_types = get_nodes_type(geo)
geo_types

{'sex': 'disc',
 'relation': 'disc',
 'is_parent': 'disc',
 'has_pets': 'disc',
 'age': 'cont',
 'mean_tr': 'cont',
 'median_tr': 'cont',
 'tr_per_month': 'cont'}

In [45]:
colums_for_code = []
columns_for_disc = []
for c in columns:
    if geo_types[c] == 'disc':
        colums_for_code.append(c)
    else:
        columns_for_disc.append(c)

In [46]:
geo_coded, label_coder = code_categories(geo_train, 'label', colums_for_code)
geo_discrete, coder = discretization(geo_coded, 'kmeans', columns_for_disc)
geo_only_discrete, discrete_coder = discretization(geo_train, 'kmeans', columns_for_disc)

# Logit simple

In [47]:
for method in method_list:
    bn_geo = structure_learning(geo_discrete, 'HC', geo_types, method, cont_disc = True)
    params = parameter_learning(geo_train, geo_types, bn_geo, 'simple')
    save_structure(bn_geo, path)
    skel = read_structure(path)
    save_params(params, f'{path}_param')
    params = read_params(f'{path}_param')
    geo_bn = HyBayesianNetwork(skel, params)

    acc, rmse, real_param, pred_param, indexes = calculate_acc(geo_bn, geo_test, columns, 'simple', normed = True)
    df_result = df_result.append({'Method': method, 'isDisc': 'D', 'isSimple': 'S', 'isLogit': 'L', **acc, **rmse}, ignore_index=True)
    df_result.to_csv(path, index=False)
    df_result = pd.read_csv(path)

    if method != 'K2':
        bn_geo = structure_learning(geo_coded, 'HC', geo_types, method, cont_disc = True)
        params = parameter_learning(geo_train, geo_types, bn_geo, 'simple')
        save_structure(bn_geo, path)
        skel = read_structure(path)
        save_params(params, f'{path}_param')
        params = read_params(f'{path}_param')
        geo_bn = HyBayesianNetwork(skel, params)

        acc, rmse, real_param, pred_param, indexes = calculate_acc(geo_bn, geo_test, columns, 'simple', normed = True)
        df_result = df_result.append({'Method': method, 'isDisc': 'M', 'isSimple': 'S', 'isLogit': 'L', **acc, **rmse}, ignore_index=True)
        df_result.to_csv(path, index=False)

  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (m

# Logit mix

In [48]:
for method in method_list:
    bn_geo = structure_learning(geo_discrete, 'HC', geo_types, method, cont_disc = True)
    params = parameter_learning(geo_train, geo_types, bn_geo, 'mix')
    save_structure(bn_geo, path)
    skel = read_structure(path)
    save_params(params, f'{path}_param')
    params = read_params(f'{path}_param')
    geo_bn = HyBayesianNetwork(skel, params)

    acc, rmse, real_param, pred_param, indexes = calculate_acc(geo_bn, geo_test, columns, 'mix', normed = True)
    df_result = pd.read_csv(path)
    df_result = df_result.append({'Method': method, 'isDisc': 'D', 'isSimple': 'M', 'isLogit': 'L', **acc, **rmse}, ignore_index=True)

    df_result.to_csv(path, index=False)
    df_result = pd.read_csv(path)
    if method != 'K2':
        bn_geo = structure_learning(geo_coded, 'HC', geo_types, method, cont_disc = True)
        params = parameter_learning(geo_train, geo_types, bn_geo, 'mix')
        save_structure(bn_geo, path)
        skel = read_structure(path)
        save_params(params, f'{path}_param')
        params = read_params(f'{path}_param')
        geo_bn = HyBayesianNetwork(skel, params)

        acc, rmse, real_param, pred_param, indexes = calculate_acc(geo_bn, geo_test, columns, 'mix', normed = True)
        df_result = df_result.append({'Method': method, 'isDisc': 'M', 'isSimple': 'M', 'isLogit': 'L', **acc, **rmse}, ignore_index=True)
        df_result.to_csv(path, index=False)

  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (m

# Unlogit simple

In [49]:
for method in method_list:
    bn_geo = structure_learning(geo_discrete, 'HC', geo_types, method, cont_disc = False)
    params = parameter_learning(geo_train, geo_types, bn_geo, 'simple')
    save_structure(bn_geo, path)
    skel = read_structure(path)
    save_params(params, f'{path}_param')
    params = read_params(f'{path}_param')
    geo_bn = HyBayesianNetwork(skel, params)

    acc, rmse, real_param, pred_param, indexes = calculate_acc(geo_bn, geo_test, columns, 'simple', normed = True)
    df_result = pd.read_csv(path)
    df_result = df_result.append({'Method': method, 'isDisc': 'D', 'isSimple': 'S', 'isLogit': 'Unl', **acc, **rmse}, ignore_index=True)
    df_result.to_csv(path, index=False)
    df_result = pd.read_csv(path)


    if method != 'K2':
        bn_geo = structure_learning(geo_coded, 'HC', geo_types, method, cont_disc = False)
        params = parameter_learning(geo_train, geo_types, bn_geo, 'simple')
        save_structure(bn_geo, path)
        skel = read_structure(path)
        save_params(params, f'{path}_param')
        params = read_params(f'{path}_param')
        geo_bn = HyBayesianNetwork(skel, params)

        acc, rmse, real_param, pred_param, indexes = calculate_acc(geo_bn, geo_test, columns, 'simple', normed = True)
        df_result = df_result.append({'Method': method, 'isDisc': 'M', 'isSimple': 'S', 'isLogit': 'Unl', **acc, **rmse}, ignore_index=True)
        df_result.to_csv(path, index=False)

  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - 

# Unlogit mix

In [50]:
for method in method_list:
    bn_geo = structure_learning(geo_discrete, 'HC', geo_types, method, cont_disc = False)
    params = parameter_learning(geo_train, geo_types, bn_geo, 'mix')
    save_structure(bn_geo, path)
    skel = read_structure(path)
    save_params(params, f'{path}_param')
    params = read_params(f'{path}_param')
    geo_bn = HyBayesianNetwork(skel, params)

    acc, rmse, real_param, pred_param, indexes = calculate_acc(geo_bn, geo_test, columns, 'mix', normed = True)
    df_result = pd.read_csv(path)
    df_result = df_result.append({'Method': method, 'isDisc': 'D', 'isSimple': 'M', 'isLogit': 'Unl', **acc, **rmse}, ignore_index=True)
    df_result.to_csv(path, index=False)
    df_result = pd.read_csv(path)
    if method != 'K2':
        bn_geo = structure_learning(geo_coded, 'HC', geo_types, method, cont_disc = False)
        params = parameter_learning(geo_train, geo_types, bn_geo, 'mix')
        save_structure(bn_geo, path)
        skel = read_structure(path)
        save_params(params, f'{path}_param')
        params = read_params(f'{path}_param')
        geo_bn = HyBayesianNetwork(skel, params)

        acc, rmse, real_param, pred_param, indexes = calculate_acc(geo_bn, geo_test, columns, 'mix', normed = True)
        df_result = df_result.append({'Method': method, 'isDisc': 'M', 'isSimple': 'M', 'isLogit': 'Unl', **acc, **rmse}, ignore_index=True)
        df_result.to_csv(path, index=False)

  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta_score = delta1 + delta2
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - mi_new)
  delta1 = -1* nrow * (mi_old - mi_new)
  delta2 = nrow * (mi_old - 