In [4]:
import pandas as pd
import numpy as np

In [91]:
# Datos de entrenamiento
data = np.array([
            ['','Pink Floyd','The Beatles','R.E.M.','Nirvana','Queen','Oasis','Class'],
            ['P1', 1, 0, 0, 1, 1, 1, 'J'],
            ['P2', 1, 1, 0, 1, 1, 0, 'J'],
            ['P3', 1, 1, 1, 0, 0, 1, 'J'],
            ['P4', 1, 0, 1, 0, 0, 1, 'J'],
            ['P5', 1, 0, 0, 0, 1, 0, 'J'],
            ['P6', 1, 1, 1, 0, 0, 0, 'J'],
            ['P7', 1, 1, 0, 0, 1, 1, 'A'],
            ['P8', 1, 1, 1, 0, 0, 1, 'A'],
            ['P9', 1, 1, 1, 1, 1, 0, 'A'],
            ['P10',1, 1, 1, 0, 1, 0, 'A'],
            ['P11',1, 1, 1, 0, 1, 1, 'A'],
            ['P12',1, 1, 0, 1, 1, 0, 'A'],
            ['P13',1, 1, 0, 1, 0, 0, 'A']])
    
dF=pd.DataFrame(data=data[1:,1:],
                  index=data[1:,0],
                  columns=data[0,1:])
dF


Unnamed: 0,Pink Floyd,The Beatles,R.E.M.,Nirvana,Queen,Oasis,Class
P1,1,0,0,1,1,1,J
P2,1,1,0,1,1,0,J
P3,1,1,1,0,0,1,J
P4,1,0,1,0,0,1,J
P5,1,0,0,0,1,0,J
P6,1,1,1,0,0,0,J
P7,1,1,0,0,1,1,A
P8,1,1,1,0,0,1,A
P9,1,1,1,1,1,0,A
P10,1,1,1,0,1,0,A


In [19]:
# Cálculo de P(band|clase) 
def prob_of_band_given_class(band,cl):
    Nc=0
    nc_band=0
    for k in dF.index:
        if dF.loc[k]['Class'] == cl:
            Nc += 1
            if dF.loc[k][band] == '1':
                nc_band += 1
    return nc_band/Nc

# Cálculo de P(clase)
def prob_of_class(cl):
    counter=0
    for k in dF.index:
        if dF.loc[k]['Class'] == cl:
            counter +=1
    return counter/len(dF.index)

print(len(dF.index))
prob_of_class('A')

# Cálculo de P(clase|persona) con MVE, la probabilidad de que una persona dada pertenezca a la clase
def mve_prob_of_being_of_class(cl,person):
    p=0
    p= prob_of_class(cl)
    for k in dF.columns[:-1]:
        if person[k] == '1':
            p *= prob_of_band_given_class(k,cl)
        else:
            p *= (1-prob_of_band_given_class(k,cl))
    return p


13


In [21]:
# Clasificador bayesiano ingenuo MEV. Clasifica un individuo como 'A' (adulto) o 'J' (joven)
def mve_nb_classifier1(person):
    if mve_prob_of_being_of_class('A',person) < mve_prob_of_being_of_class('J',person):
        return 'J'
    if mve_prob_of_being_of_class('J',person) < mve_prob_of_being_of_class('A',person):
        return 'A'
        
for k in dF.index:
    print(k,mve_nb_classifier1(dF.loc[k]),dF.loc[k]['Class'])

P1 J J
P2 A J
P3 A J
P4 J J
P5 J J
P6 A J
P7 A A
P8 A A
P9 A A
P10 A A
P11 A A
P12 A A
P13 A A


In [112]:
# Datos para predicciones
tdata = np.array([    
        ['','Pink Floyd','The Beatles', 'R.E.M.','Nirvana','Queen','Oasis'],
        ['x1',1, 1, 0, 1, 1, 0],
        ['x2',1, 0, 1, 1, 1, 1],
        ['x3',1, 1, 0, 0, 0, 0],
        ['x4',1, 1, 1, 1, 1, 1],
        ['x5',0, 1, 1, 1, 1, 1]])

testdF=pd.DataFrame(data=tdata[1:,1:],
                  index=tdata[1:,0],
                  columns=tdata[0,1:])
testdF

Unnamed: 0,Pink Floyd,The Beatles,R.E.M.,Nirvana,Queen,Oasis
x1,1,1,0,1,1,0
x2,1,0,1,1,1,1
x3,1,1,0,0,0,0
x4,1,1,1,1,1,1
x5,0,1,1,1,1,1


In [68]:
# Predicciones
for k in testdF.index:
    print(mve_nb_classifier1(testdF.loc[k]))

A
J
A
A
None


In [118]:
# Suponemos que P(C) tiene distribución Bernoulli B(q). Buscamos q.

def qmap_class(cl):
    Nc=0
    for k in dF.index:
        if dF.loc[k]['Class'] == cl:
            Nc += 1
    return (Nc+1)/(13+Nc)
print(qmap_class('A'))
print(qmap_class('J'))

def qmap_band_given_class(cl,band):
    nc_a = 0
    Nc = 0
    for k in dF.index:
        if dF.loc[k]['Class'] == cl:
            Nc +=1
            if dF.loc[k][band] == '1':
                nc_a +=1
    
    return (nc_a +1)/(6+Nc)

def qmaps_given_class(cl):
    qmaps={}
    for k in dF.columns[:-1]:
        qmaps[k] = qmap_band_given_class(cl,k)
    return qmaps

print(qmaps_given_class('A'))
print(qmaps_given_class('J'))

# Cálculo de P(clase|persona)
def map_prob_of_being_of_class(cl,person):
    qmaps = qmaps_given_class(cl)
    p = qmap_class(cl)
    for k in dF.columns[:-1]:
        if person[k] == '1':
            p *= qmaps[k]
        else: 
            p *= (1-qmaps[k])
    return p



0.4
0.3684210526315789
{'Pink Floyd': 0.6153846153846154, 'The Beatles': 0.6153846153846154, 'R.E.M.': 0.38461538461538464, 'Nirvana': 0.3076923076923077, 'Queen': 0.46153846153846156, 'Oasis': 0.3076923076923077}
{'Pink Floyd': 0.5833333333333334, 'The Beatles': 0.3333333333333333, 'R.E.M.': 0.3333333333333333, 'Nirvana': 0.25, 'Queen': 0.3333333333333333, 'Oasis': 0.3333333333333333}


In [106]:
def map_nb_classifier1(person):
    if map_prob_of_being_of_class('A',person) < map_prob_of_being_of_class('J',person):
        return 'J'
    if map_prob_of_being_of_class('J',person) < map_prob_of_being_of_class('A',person):
        return 'A'
    
#map_nb_classifier1(dF.loc['P1'])
print(dF.loc['P1'])

    
print('Per','OCl','MVE', 'MAP')
for k in dF.index:
    print(k,' ', dF.loc[k]['Class'],' ', mve_nb_classifier1(dF.loc[k]),' ',map_nb_classifier1(dF.loc[k]))
for k in testdF.index:
    print(k,' ', 'UNK',' ', mve_nb_classifier1(testdF.loc[k]),' ',map_nb_classifier1(testdF.loc[k]))

Pink Floyd     1
The Beatles    0
R.E.M.         0
Nirvana        1
Queen          1
Oasis          1
Class          J
Name: P1, dtype: object
Per OCl MVE MAP
P1   J   J   J
P2   J   A   A
P3   J   A   A
P4   J   J   J
P5   J   J   J
P6   J   A   A
P7   A   A   A
P8   A   A   A
P9   A   A   A
P10   A   A   A
P11   A   A   A
P12   A   A   A
P13   A   A   A
x1   UNK   A   A
x2   UNK   J   A
x3   UNK   A   A
x4   UNK   A   A
x5   UNK   None   A


In [117]:
print(mve_prob_of_being_of_class('A',dF.loc['P1']),mve_prob_of_being_of_class('J',dF.loc['P1']))
print(map_prob_of_being_of_class('A',dF.loc['P1']),map_prob_of_being_of_class('J',dF.loc['P1']))

print(mve_prob_of_being_of_class('A',testdF.loc['x2']),mve_prob_of_being_of_class('J',testdF.loc['x2']))
print(map_prob_of_being_of_class('A',testdF.loc['x2']),map_prob_of_being_of_class('J',testdF.loc['x2']))

0.0 0.009615384615384616
0.002545781281173546 0.002653238033354993
0.0 0.009615384615384616
0.0015911133007334662 0.001326619016677496
