In [None]:
__version__ = '1.0.0-beta.1'
__author__ = 'Martino Pulici'

In [None]:
import pandas as pd
from pgmpy.inference import VariableElimination
from pgmpy.models import BayesianModel
from pgmpy.sampling import BayesianModelSampling

import functions

In [None]:
BINS = 4

In [None]:
df = pd.read_csv("georgieva_suta_burton_data.csv", sep=',')
df = df.rename(columns={'Label': 'Chart'})

for label in ['Artist', 'Track', 'Month', 'Key', 'Mode']:
    del df[label]
    
for label in [
    'Danceability',
    'Valence',
    'Acousticness',
    'Loudness',
    'Instrumentalness',
    'Year',
    'Energy',
    'Tempo',
    'Liveness',
        'Speechiness']:
    df[label] = pd.cut(df[label], BINS, labels=range(BINS))

In [None]:
model = BayesianModel([('Danceability', 'Valence'),
                       ('Acousticness', 'Loudness'),
                       ('Instrumentalness', 'Loudness'),
                       ('Year', 'Loudness'),
                       ('Energy', 'Valence'),
                       ('Tempo', 'Energy'),
                       ('Liveness', 'Energy'),
                       ('Liveness', 'Speechiness'),
                       ('Loudness', 'Energy'),
                       ('Loudness', 'Chart'),
                       ('Speechiness', 'Danceability'),
                       ('Danceability', 'Chart'),
                       ('ArtistScore', 'Chart'),
                       ])
model.fit(df)

if not model.check_model():
    print("Incorrect model!")

In [None]:
cpd = model.get_cpds('Danceability')
print(cpd)

In [None]:
print(model.is_active_trail('Danceability', 'Energy'))
print(model.is_active_trail('Danceability', 'Energy', observed='Speechiness'))

In [None]:
print(model.get_markov_blanket('Danceability'))

In [None]:
print(model.is_active_trail('Danceability', 'Liveness'))
print(model.is_active_trail('Danceability', 'Liveness', observed=model.get_markov_blanket('Danceability')))

In [None]:
exact_inference = VariableElimination(model)
approximate_inference = BayesianModelSampling(model)

In [None]:
VARIABLE = 'Valence'
EVIDENCE_1 = {'Chart': 1}
EVIDENCE_2 = {'Chart': 0}

In [None]:
functions.exact(exact_inference, VARIABLE, EVIDENCE_1)

In [None]:
functions.rejection(approximate_inference, VARIABLE, EVIDENCE_1)

In [None]:
functions.weighted(approximate_inference, VARIABLE, EVIDENCE_1)

In [None]:
functions.exact(exact_inference, VARIABLE, EVIDENCE_2)

In [None]:
sizes, exact_results, rejection_results, weighted_results = functions.graph_points(
    exact_inference, approximate_inference, VARIABLE, EVIDENCE_1)

In [None]:
functions.graph(
    sizes,
    exact_results,
    rejection_results,
    weighted_results,
    VARIABLE,
    EVIDENCE_1)

In [None]:
functions.diff_graph(
    sizes,
    exact_results,
    rejection_results,
    weighted_results,
    VARIABLE,
    EVIDENCE_1)