# Book Review Model

# Data Processing

In [None]:
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
import numpy as np

#Come from GenAI to troubleshoot issue with numpy
if not hasattr(np, 'product'):
    np.product = np.prod

ratings_df = pd.read_csv('Books_rating.csv')
data_df = pd.read_csv('books_data.csv')
df = pd.merge(ratings_df, data_df, on='Title')

#Took a sample of it
df = df.sample(n=1000, random_state=42)

#Gets columns ready
df['Book_Popularity'] = pd.qcut(df['ratingsCount'].fillna(0), q=2, labels=['Low', 'High'], duplicates='drop')
df['Rating_Prediction'] = df['review/score'].astype(int).astype(str)

df['Book_Popularity'] = pd.Categorical(df['Book_Popularity'], categories=['Low', 'High'])
df['Rating_Prediction'] = pd.Categorical(df['Rating_Prediction'], categories=['1','2','3','4','5'])
df.dropna(subset=['Book_Popularity', 'Rating_Prediction'], inplace=True)

df = df[['Book_Popularity', 'Rating_Prediction']]

model = BayesianNetwork([('Book_Popularity', 'Rating_Prediction')])
model.fit(df[['Book_Popularity', 'Rating_Prediction']], estimator=MaximumLikelihoodEstimator)

inference = VariableElimination(model)

  data.groupby([variable] + parents).size().unstack(parents)


In [None]:
#Prob Distribution
marginal_rating = inference.query(variables=['Rating_Prediction'])
print(marginal_rating)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 715.75it/s]
Eliminating: Book_Popularity: 100%|██████████| 1/1 [00:00<00:00, 839.03it/s]

+----------------------+--------------------------+
| Rating_Prediction    |   phi(Rating_Prediction) |
| Rating_Prediction(1) |                   0.0700 |
+----------------------+--------------------------+
| Rating_Prediction(2) |                   0.0550 |
+----------------------+--------------------------+
| Rating_Prediction(3) |                   0.0770 |
+----------------------+--------------------------+
| Rating_Prediction(4) |                   0.2120 |
+----------------------+--------------------------+
| Rating_Prediction(5) |                   0.5860 |
+----------------------+--------------------------+





In [17]:
#Shows CPTS
print("Model nodes:", model.nodes())
print("Model edges:", model.edges())

print("\nCPTs:")
for cpd in model.get_cpds():
    print(cpd)


Model nodes: ['Book_Popularity', 'Rating_Prediction']
Model edges: [('Book_Popularity', 'Rating_Prediction')]

CPTs:
+-----------------------+-------+
| Book_Popularity(High) | 0.473 |
+-----------------------+-------+
| Book_Popularity(Low)  | 0.527 |
+-----------------------+-------+
+----------------------+-----------------------+----------------------+
| Book_Popularity      | Book_Popularity(High) | Book_Popularity(Low) |
+----------------------+-----------------------+----------------------+
| Rating_Prediction(1) | 0.06765327695560254   | 0.07210626185958255  |
+----------------------+-----------------------+----------------------+
| Rating_Prediction(2) | 0.05919661733615222   | 0.051233396584440226 |
+----------------------+-----------------------+----------------------+
| Rating_Prediction(3) | 0.07610993657505286   | 0.0777988614800759   |
+----------------------+-----------------------+----------------------+
| Rating_Prediction(4) | 0.20084566596194503   | 0.22201138519924

In [None]:
def predict_rating_given_popularity(popularity_value):
    evidence = {'Book_Popularity': popularity_value}
    query_result = inference.query(variables=['Rating_Prediction'], evidence=evidence)
    print(f"Given Book_Popularity = {popularity_value}:")
    print(query_result)
    print()

def predict_popularity_given_rating(rating_value):
    evidence = {'Rating_Prediction': rating_value}
    query_result = inference.query(variables=['Book_Popularity'], evidence=evidence)
    print(f"Given Rating_Prediction = {rating_value}:")
    print(query_result)
    print()

# Test predictions
predict_rating_given_popularity('High')
predict_rating_given_popularity('Low')

predict_popularity_given_rating('5')
predict_popularity_given_rating('1')

Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]


Given Book_Popularity = High:
+----------------------+--------------------------+
| Rating_Prediction    |   phi(Rating_Prediction) |
| Rating_Prediction(1) |                   0.0677 |
+----------------------+--------------------------+
| Rating_Prediction(2) |                   0.0592 |
+----------------------+--------------------------+
| Rating_Prediction(3) |                   0.0761 |
+----------------------+--------------------------+
| Rating_Prediction(4) |                   0.2008 |
+----------------------+--------------------------+
| Rating_Prediction(5) |                   0.5962 |
+----------------------+--------------------------+



Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]


Given Book_Popularity = Low:
+----------------------+--------------------------+
| Rating_Prediction    |   phi(Rating_Prediction) |
| Rating_Prediction(1) |                   0.0721 |
+----------------------+--------------------------+
| Rating_Prediction(2) |                   0.0512 |
+----------------------+--------------------------+
| Rating_Prediction(3) |                   0.0778 |
+----------------------+--------------------------+
| Rating_Prediction(4) |                   0.2220 |
+----------------------+--------------------------+
| Rating_Prediction(5) |                   0.5769 |
+----------------------+--------------------------+



Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]


Given Rating_Prediction = 5:
+-----------------------+------------------------+
| Book_Popularity       |   phi(Book_Popularity) |
| Book_Popularity(High) |                 0.4812 |
+-----------------------+------------------------+
| Book_Popularity(Low)  |                 0.5188 |
+-----------------------+------------------------+



Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]

Given Rating_Prediction = 1:
+-----------------------+------------------------+
| Book_Popularity       |   phi(Book_Popularity) |
| Book_Popularity(High) |                 0.4571 |
+-----------------------+------------------------+
| Book_Popularity(Low)  |                 0.5429 |
+-----------------------+------------------------+




