In [None]:
import numpy as np
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
from pgmpy.sampling import BayesianModelSampling
from pgmpy.factors.discrete import State

In [None]:
df = pd.read_csv('complete_project_data.csv', sep = ',')
df = df.rename(columns={'Label': 'Chart'})
del df['Artist']
del df['Track']
del df['Month']
del df['Key']
del df['Mode']
for label in ['Danceability','Valence','Acousticness','Loudness','Instrumentalness','Year','Energy','Tempo','Liveness','Speechiness']:
    df[label]=pd.cut(df[label], 3, labels=range(3))

In [None]:
model = BayesianModel([('Danceability', 'Valence'),
                       ('Acousticness', 'Loudness'),
                       ('Instrumentalness', 'Loudness'),
                       ('Year', 'Loudness'),
                       ('Energy', 'Valence'),
                       ('Tempo', 'Energy'),
                       ('Liveness', 'Energy'),
                       ('Liveness', 'Speechiness'),
                       ('Loudness', 'Energy'),
                       ('Loudness', 'Chart'),
                       ('Speechiness', 'Danceability'),
                       ('Danceability', 'Chart'),
                       ('ArtistScore', 'Chart'),
                      ])

model.fit(df)
model.check_model()

In [None]:
for cpd in model.get_cpds():
    print(cpd)

In [None]:
exact_inference = VariableElimination(model)
approximate_inference = BayesianModelSampling(model)

In [None]:
print('P(Valence)')
print(exact_inference.query(['Valence'],show_progress=False))

In [None]:
size = 10000
query = approximate_inference.likelihood_weighted_sample(size=size, return_type='recarray')
query_probabilities = []
for value in range(3): #TODO fix manual constant
    query_probabilities.append([np.sum(np.dot(query['Valence']==value,query['_weight']))/np.sum(query['_weight'])])
cpd = TabularCPD('Valence', 3, query_probabilities)
print('P(Valence)')
print(cpd)

In [None]:
size = 10000
query = approximate_inference.rejection_sample(size=size, return_type='recarray',show_progress=False)
query_probabilities = []
for value in range(3): #TODO fix manual constant
    query_probabilities.append([np.count_nonzero(query['Valence']==value)/size])
cpd = TabularCPD('Valence', 3, query_probabilities)
print('P(Valence)')
print(cpd)

In [None]:
print('P(Valence|Chart=1)')
print(exact_inference.query(['Valence'],{'Chart':1},show_progress=False))

In [None]:
size = 10000
query = approximate_inference.likelihood_weighted_sample(evidence=[State('Chart',1)],size=size, return_type='recarray')
query_probabilities = []
for value in range(3): #TODO fix manual constant
    query_probabilities.append([np.sum(np.dot(query['Valence']==value,query['_weight']))/np.sum(query['_weight'])])
cpd = TabularCPD('Valence', 3, query_probabilities)
print('P(Valence|Chart=1)')
print(cpd)

In [None]:
size = 10000
query = approximate_inference.rejection_sample(evidence=[State('Chart',1)], size=size, return_type='recarray',show_progress=False)
query_probabilities = []
for value in range(3): #TODO fix manual constant
    query_probabilities.append([np.count_nonzero(query['Valence']==value)/size])
cpd = TabularCPD('Valence', 3, query_probabilities)
print('P(Valence|Chart=1)')
print(cpd)