In [None]:
import pandas as pd

# get data with reviews (already containing only the 10,000 most active users and price score)
reviews = pd.read_csv('processed_data/reviews_price_score.csv')

In [None]:
# get distribution of stars rating
stars_data = reviews.groupby('stars').count()
stars_data['review_id'] = stars_data['review_id'] / reviews.shape[0]

# check percentages
stars_data

In [None]:
import random
from tqdm.notebook import tqdm

# important for reproducible results
random.seed(0)

# naive simulator
sample_list = 35 * [5] + 35 * [4] + 17 * [3] + 8 * [2] + 5 * [1]
naive_ratings = []
for i in tqdm(range(reviews.shape[0])):
    naive_ratings.append(random.choice(sample_list))

# add column to the review df
reviews['naive_rating'] = naive_ratings

# check if new column works
reviews

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

ids = reviews.groupby('user_id').count().reset_index()['user_id'].tolist()
sns.set_style('white')

def plot_expectation_distribution(reviews_df, user_ids, ratings_column, saving_path='none'):
    user_expectations = create_expectation_dataframe(reviews_df, user_ids, ratings_column)
    sns.histplot(data=user_expectations, x='expectation', color=[0.6, 0.6, 1.0], fill=True)
    if saving_path != 'none':
        plt.savefig(saving_path, dpi=200)
        print('saved')
    plt.show()

def create_expectation_dataframe(reviews_df, user_ids, ratings_column):

    user_expectation = {}
    for user in tqdm(user_ids):
        user_reviews = reviews_df[reviews_df['user_id'] == user]
        user_expectation[user] = user_reviews[['price_score', ratings_column]].corr().iloc[0, 1]

    user_expectations = pd.DataFrame.from_dict(user_expectation, orient='index').reset_index()
    user_expectations = user_expectations.rename(columns={"index": "user_id", 0: "correlation"})
    user_expectations['expectation'] = 0.5 * (1 - user_expectations['correlation'])

    return user_expectations

In [None]:
# generate plot for naive ratings
plot_expectation_distribution(reviews, ids, 'naive_rating', 'figures/naive_user_expectations.png')

In [None]:
traditional_expectations = create_expectation_dataframe(reviews, ids, 'stars')
naive_expectations = create_expectation_dataframe(reviews, ids, 'naive_rating')