In [202]:
import pandas as pd
import numpy as np

path = "dataset/titanic/"
dt_train = pd.read_csv(path+"train.csv", usecols=['Survived', 'Pclass', 'Sex', 'Age', 'Fare', 'Embarked']).dropna()

dt_train.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,Embarked
0,0,3,male,22.0,7.25,S
1,1,1,female,38.0,71.2833,C
2,1,3,female,26.0,7.925,S
3,1,1,female,35.0,53.1,S
4,0,3,male,35.0,8.05,S


In [203]:
# Transformando os dados de texto para dados numéricos:

dt_train.replace(['male', 'female'], [0,1], inplace=True)
dt_train['Embarked'].replace(['S', 'C', 'Q'], [1,2,3], inplace=True)

In [204]:
dt_train.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,Embarked
0,0,3,0,22.0,7.25,1
1,1,1,1,38.0,71.2833,2
2,1,3,1,26.0,7.925,1
3,1,1,1,35.0,53.1,1
4,0,3,0,35.0,8.05,1


In [205]:
def finalEntropy(entropies, total):
    entropy = 0
    for t, e in zip(total, entropies):
        if np.isnan(e):
            e = 0
        entropy = entropy + t/sum(total)*e
    return entropy

def innerFunc(denominator, survived, notSurvived):
    return (-(notSurvived/denominator*np.log2(notSurvived/denominator))-(survived/denominator*np.log2(survived/denominator)))

def entropyCalculator(dt_caracteristic, dt_survived, intervalList=None):
    s_total = []
    entropies = []
    temp = 0
    if intervalList:
        for inter in intervalList:
            notSurvived = dt_train[(dt_caracteristic>temp) & (dt_caracteristic<=inter) & (dt_survived==0)].count()[0]
            survived = dt_train[(dt_caracteristic>temp) & (dt_caracteristic<=inter) & (dt_survived==1)].count()[0]
            temp = inter
            total = notSurvived+survived
            s_total.append(total)
            entropies.append(innerFunc(total, survived, notSurvived))
    else:
        for p in dt_caracteristic.unique():
            notSurvived = dt_train[(dt_caracteristic==p) & (dt_survived==0)].count()[0]
            survived = dt_train[(dt_caracteristic==p) & (dt_survived==1)].count()[0]
            total = notSurvived+survived
            s_total.append(total)
            entropies.append(innerFunc(total, survived, notSurvived))
    return finalEntropy(entropies, s_total)

def targetClassEntropy(dt_class):
    for p in dt_class.unique():
            notSurvived = dt_train[dt_class==0].count()[0]
            survived = dt_train[dt_class==1].count()[0]
            total = notSurvived+survived
            s_total.append(total)
            entropies.append(innerFunc(total, survived, notSurvived))
    return finalEntropy(entropies, s_total)

In [206]:
gainList = []

SurvivedEntropy = targetClassEntropy(dt_train['Survived'])
SurvivedEntropy

0.9735190023846807

In [207]:
PClassEntropy = entropyCalculator(dt_train['Pclass'], dt_train['Survived'])
gainList.append(['PClass', SurvivedEntropy - PClassEntropy])

In [208]:
SexEntropy = entropyCalculator(dt_train['Sex'], dt_train['Survived'])
gainList.append(['Sex', SurvivedEntropy - SexEntropy])

In [209]:
age_interval = [30, 50, 80]
AgeEntropy = entropyCalculator(dt_train['Age'], dt_train['Survived'], age_interval)
gainList.append(['Age', SurvivedEntropy - AgeEntropy])

In [210]:
fare_interval = [128, 256, 384]
FareEntropy = entropyCalculator(dt_train['Fare'], dt_train['Survived'], fare_interval)
gainList.append(['Fare', SurvivedEntropy - FareEntropy])

In [211]:
EmbarkedEntropy = entropyCalculator(dt_train['Embarked'], dt_train['Survived'])
gainList.append(['Embarked', SurvivedEntropy - EmbarkedEntropy])

In [212]:
gainList

[['PClass', 0.09400998456880594],
 ['Sex', 0.21410831283572285],
 ['Age', 0.0016444881735017125],
 ['Fare', 0.015799848969737185],
 ['Embarked', 0.027858206925991946]]

In [213]:
# Implementação do Grafo:

from collections import defaultdict

class Node():
    def __init__(self, data):
        self.data = data


class Grafo(object):

    def __init__(self, direcionado=False):
        self.adj = defaultdict(set)
        self.direcionado = direcionado


    def get_vertices(self):
        return list(self.adj.keys())


    def get_arestas(self):
        return [(k, v) for k in self.adj.keys() for v in self.adj[k]]


    def adiciona_arestas(self, node1, node2):
        self.adiciona_arco(node1, node2)


    def __adiciona_arco__(self, node1, node2):
        self.adj[node1].add(node2)
        if not self.direcionado:
            self.adj[node2].add(node1)


    def existe_aresta(self, node1, node2):
        return node1 in self.adj and node2 in self.adj[node1]


    def __len__(self):
        return len(self.adj)


    def __str__(self):
        return '{}({})'.format(self.__class__.__name__, dict(self.adj))


    def __getitem__(self, node2):
        return self.adj[node2]