## Reading the neccessary files and libraries

In [None]:
import platform
import types
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pickle
import random
from datetime import datetime, timedelta
from tqdm import tqdm
import time
import math
from typing import List
from scipy import stats
import types

print(platform.python_version())

def imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            try:
                yield val.__name__, val.__version__
            except AttributeError:
                yield val.__name__


Mounted at /content/drive
/content/drive/My Drive/Dissertation


  exec(code_obj, self.user_global_ns, self.user_ns)


In [None]:
data_location = '../data/raw_data/'
with open(f'{data_location}games_list.pkl', 'rb') as fp1: 
    games_id_list = pickle.load(fp1) 
games_id_list = list(games_id_list)


### Selecting 5 random games to perform a simulation of our hypothesized review of STEAMs algorithm and compare it real time

In [None]:
def random_no_generator():
    return random.randint(0, len(games_id_list) - 1)

def generate_five_gameIDs(games_id_list):
    random_id_list = []
    while len(random_id_list) < 5:
        no = random_no_generator()
        game_id = games_id_list[no]
        if game_id not in random_id_list:
            random_id_list.append(game_id)
    return random_id_list

random_id_list = generate_five_gameIDs(games_id_list)


In [None]:
print(random_id_list)
# [252870, '1547380', '298110', '39530', 240]

# Resimulation stage

### Gathering reviews in real-time from the 5 randomly selected video-games


In [None]:
# extracting the reviews of the 5 games in real time. 

def fetch_review(appid, params={'json': 1}):
    url = 'https://store.steampowered.com/appreviews/'
    response = requests.get(url=url + appid, params=params, headers={'User-Agent': 'Mozilla/5.0'})
    return response.json()

def fetch_all_reviews(appid):
    review_count = int(((fetch_review(str(appid)))["query_summary"])["total_reviews"])
    reviews = []
    cursor = '*'
    params = {
        'json': 1,
        'filter': 'all',
        'language': 'english',
        'day_range': 9223372036854775807,
        'review_type': 'all',
        'purchase_type': 'all'
    }
    while review_count > 0:
        params['cursor'] = cursor.encode()
        params['num_per_page'] = min(100, review_count)
        review_count -= 100
        response = fetch_review(appid, params)
        cursor = response['cursor']
        reviews += response['reviews']
        if len(response['reviews']) < 100: 
            break
    return reviews

def collect_reviews(FPS_appids):
    review_list = []
    for appid in tqdm(FPS_appids):
        reviews = fetch_all_reviews(str(appid))
        review_list.append(reviews)
    return review_list


In [None]:
validating_list = collect_reviews(random_id_list)

100%|██████████| 5/5 [03:23<00:00, 40.63s/it]


In [None]:
def create_dataframe_from_reviews(game_ids, reviews_list):
    columns = ['game_id', 'score', 'steam_id', 'review', 'timestamp_created', 'timestamp_updated', 'votes_up', 'recommendation_id']
    data = []

    for game_id, game_reviews in zip(game_ids, reviews_list):
        for review in game_reviews:
            data.append([
                game_id,
                review["author"]["steamid"],
                review["review"],
                review["timestamp_created"],
                review["timestamp_updated"],
                review["weighted_vote_score"],
                review["votes_up"],
                review["recommendationid"]
            ])

    return pd.DataFrame(data, columns=columns)


df = create_dataframe_from_reviews(games_id_list, single_FPS_reviews)


In [None]:
df = create_dataframe_from_reviews(random_id_list, validating_list)

100%|██████████| 5/5 [00:00<00:00, 112.85it/s]


## Testing our hypothesized sorting algorithm for the main bar reviews

Please refer to our manuscript to understand how the hypothesized sorting algorithm of the main bar review works. 

In [None]:
def sort_and_rearrange(temp_df):
    temp_df["score"] = pd.to_numeric(temp_df["score"])
    temp_df = temp_df.sort_values("score", ascending=False)
    temp_df["index"] = [i for i in range(len(temp_df))]
    temp_df = temp_df.set_index("index")
    return temp_df

def get_x_days_ago(end_int, days):
    end = datetime.fromtimestamp(end_int)
    start = (end - timedelta(days=days))
    start_int = int(time.mktime(start.timetuple()))
    return start_int

def get_x_days_ago_reviews(days, df, end_int):
    start = get_x_days_ago(int(end_int), days)
    df['timestamp_created'] = pd.to_numeric(df['timestamp_created'])
    temp_dates = df[df['timestamp_created'].between(start, end_int - 1)]
    return temp_dates

def condition_against(temp1, temp2, difference):
    if len(temp1) == 10:
        return temp1
    elif difference == 10 or len(temp2) == 0:
        return temp2[:len(temp2)]
    else:
        for i in range(difference):
            if i < len(list(temp2["score"])) and float(list(temp2["score"])[i]) > float(list(temp1["score"])[0]):
                to_be_added = temp2[i:i + 1]
                temp1 = pd.concat([temp1, to_be_added])
        return temp1

def temp_conditional_return_1(time_created, day_range1, day_range_ultimate, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff = day_range_ultimate - day_range1
    in_between_days_reviews = get_x_days_ago_reviews(days_diff, df, day_range1_date)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(in_between_days_reviews)
    return condition_against(temp1, temp2, difference)

def temp_conditional_return_2(time_created, day_range1, day_range2, day_range_ultimate, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff_1 = day_range2 - day_range1
    day_range2_reviews = get_x_days_ago_reviews(days_diff_1, df, day_range1_date)
    day_range2_date = get_x_days_ago(time_created, day_range2)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(day_range2_reviews)
    temp = condition_against(temp1, temp2, difference)
    difference_2 = 10 - len(temp)
    days_diff_2 = day_range_ultimate - day_range2
    day_range3_reviews = get_x_days_ago_reviews(days_diff_2, df, day_range2_date)
    temp3 = sort_and_rearrange(day_range3_reviews)
    return condition_against(temp, temp3, difference_2)

def temp_conditional_return_3(time_created, day_range1, day_range2, day_range_3, start_date, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff_1 = day_range2 - day_range1
    day_range2_reviews = get_x_days_ago_reviews(days_diff_1, df, day_range1_date)
    day_range2_date = get_x_days_ago(time_created, day_range2)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(day_range2_reviews)
    temp = condition_against(temp1, temp2, difference)
    difference_2 = 10 - len(temp)
    days_diff_2 = day_range_3 - day_range2
    day_range3_reviews = get_x_days_ago_reviews(days_diff_2, df, day_range2_date)
    temp3 = sort_and_rearrange(day_range3_reviews)
    temp = condition_against(temp, temp3, difference_2)
    day_range3_date = get_x_days_ago(time_created, day_range_3)
    day_range_4 = int((time_created - start_date) / 86400)
    difference_3 = 10 - len(temp)
    days_diff_3 = day_range_4 - day_range_3
    day_range4_reviews = get_x_days_ago_reviews(days_diff_3, df, day_range3_date)
    temp4 = sort_and_rearrange(day_range4_reviews)
    return condition_against(temp, temp4, difference_3)

def get_list_of_visible_reviews(df):
    order_of_visibility, order_of_scores, order_of_times, list_of_visible_id = [], [], [], []
    for i in range(len(df)):
        player = str(df["steam_id"][i])
        if player != "validating":
            continue
        game_id = str(df["game_id"][i])
        time_created = int(df["timestamp_created"][i])
        temp = df[df['game_id'] == game_id]
        temp = temp[temp['votes_up'] != 0]
        temp_dates = get_x_days_ago_reviews(30, temp, time_created)
        if len(temp) == 0:
            temp = temp_dates
        elif len(temp_dates) >= 10:
            temp = temp_dates
        else:
            temp_90_dates = temp_conditional_return_1(time_created, 30, 90, temp)
            if len(temp_90_dates) >= 10:
                temp = temp_90_dates
            else:
                temp_180_dates = temp_conditional_return_2(time_created, 30, 90, 180, temp)
                if len(temp_180_dates) >= 10:
                    temp = temp_180_dates
                else:
                    start_date = int(temp['timestamp_created'].min())
                    temp = temp_conditional_return_3(time_created, 30, 90, 180, start_date, temp)

        temp = sort_and_rearrange(temp)
        n = min(len(temp), 10)
        list_of_visible = list(temp["review"][:n]) 
        list_of_scores = list(temp["score"][:n])
        list_of_times = list(temp["timestamp_created"][:n])
        list_of_visible_2 = list(temp["recommendation_id"][:n])
        list_of_visible_id.append(list_of_visible_2)
        order_of_visibility.append(list_of_visible)
        order_of_scores.append(list_of_scores)
        order_of_times.append(list_of_times)

    return order_of_visibility, order_of_scores, order_of_times, list_of_visible_id


In [None]:
# Creating hypothetical data as if we are the reviewers writing the review in real time
df = df[df['steam_id'] != "validating"]
stimulated_time = 1656606800
game_id = random_id_list
scores = [random.random() for i in range(len(random_id_list))]
steam_id_list = ["validating"] * 5
player_review_list = ["validating"] * 5
time_stamp_created_list = [stimulated_time] * 5
recommendation_id_list_2 = ["1", "2", "3", "4", "5"]
time_stamp_updated_list = time_stamp_created_list
votes_up_list = [0] * 5

stimulated = pd.DataFrame(
    list(zip(game_id, scores, steam_id_list, player_review_list, time_stamp_created_list, time_stamp_updated_list, votes_up_list, recommendation_id_list_2)), 
    columns=['game_id', 'score', 'steam_id', 'review', 'timestamp_created', 'timestamp_updated', 'votes_up', 'recommendation_id']
) 

df = pd.concat([df, stimulated])
df["index"] = list(range(len(df)))
df = df.set_index("index")


In [None]:
# Perform the re-simulation with the hypothesized default-sorted algorithm 
order_of_visible_reviews, order_of_scores, order_of_times, id_list = get_list_of_visible_reviews(df)
validating = pd.DataFrame(
    list(zip(game_id, order_of_visible_reviews, order_of_scores, order_of_times, id_list)), 
    columns=['game_id', 'reviews', 'scores', 'times_order', 'id']
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
100%|██████████| 31351/31351 [00:00<00:00, 101706.97it/s]


Unnamed: 0,game_id,reviews,scores,times order
0,252870,[After seeing Neebs gaming announce that they ...,"[0.8003709912300109, 0.719564139842987, 0.5564...","[1652381956, 1650579308, 1655533461, 165460486..."
1,1547380,[[table][th]\nReview by [url=https://store.ste...,"[0.7956037521362304, 0.789889633655548, 0.6298...","[1644564603, 1644558942, 1649456280, 165500634..."
2,298110,[Far Cry 4 is currently [h1]FREE[/h1] on Amazo...,"[0.8769996762275695, 0.8227600455284118, 0.667...","[1654100832, 1654153357, 1654373965, 165420953..."
3,39530,[Faster than a bullet\nTerrifying scream\nEnra...,"[0.865518569946289, 0.8042218089103698, 0.7882...","[1533718571, 1493389162, 1496480907, 157765442..."
4,240,[Before I played Counter-Strike: Source i had ...,"[0.9212783575057985, 0.9050492644309996, 0.838...","[1654604660, 1654361577, 1654020027, 165412612..."


In [None]:
def print_nicely(df):
    game_list, reviews_list, order = [], [], []
    for i in range(len(df)):
        game = df["game_id"][i]
        reviews = list(df["reviews"][i])
        for j in range(10):
            game_list.append(game)
            reviews_list.append(str(reviews[j]))
            order.append(j + 1)
    new_df = pd.DataFrame(
        list(zip(game_list, reviews_list, order)), 
        columns=['game_id', 'review', "order"]
    )
    return new_df


In [None]:
validation = print_nicely(validating)
validation


Unnamed: 0,game_id,review,order
0,252870,After seeing Neebs gaming announce that they w...,1
1,252870,The game is definitely fun IF you play with pe...,2
2,252870,Pulsar is a game that can't really be describe...,3
3,252870,My new favourite! Great fun for playing with f...,4
4,252870,This game is janky as all hell with poor feedb...,5
5,252870,"A load of fun, but better with a real crew tha...",6
6,252870,Amazing game to play with friends. Cool mechan...,7
7,252870,funny space game where you get mad at friends,8
8,252870,Game be gud,9
9,252870,i think this game is really fun with friends a...,10


In [None]:
# Comparing our results to the ground-truth results using Kendall's Tau coefficient

# ID=252870
ground_truth = [1, 2, 10, 3, 4, 5, 6, 7, 8, 9]
predicted = [i for i in range(1, 11)]


tau, p_value = stats.kendalltau(ground_truth, predicted)
print("tau is", tau, "with a p-value of", p_value)


tau is 0.6888888888888888  with p-value of 0.00468694885361552


In [None]:
## ID=1547380
ground_truth=[1,3,11,4,5,6,7,8,9,10]
tau, p_value = stats.kendalltau(ground_truth, predicted)
print("tau is", tau, " with p-value of", p_value)

tau is 0.6888888888888888  with p-value of 0.00468694885361552


In [None]:
## ID=298110
ground_truth=[1,2,3,4,5,6,7,8,11,12]
tau, p_value = stats.kendalltau(ground_truth, predicted)
print("tau is", tau, " with p-value of", p_value)

tau is 0.9999999999999999  with p-value of 5.511463844797178e-07


In [None]:
## ID = 39350
ground_truth=[1,2,11,6,7,8,12,9,10,13]
tau, p_value = stats.kendalltau(ground_truth, predicted)
print("tau is", tau, " with p-value of", p_value)

tau is 0.6888888888888888  with p-value of 0.00468694885361552


In [None]:
## ID = 240
ground_truth=[1,2,3,4,5,6,7,8,9,10]
tau, p_value = stats.kendalltau(ground_truth, predicted)
print("tau is", tau, " with p-value of", p_value)

tau is 0.9999999999999999  with p-value of 5.511463844797178e-07


## Testing our hypothesized sorting algorithm for our side bar of reviews

Refer to the manuscript to understand how our hypothesized sorting algorithm works. 

In [None]:
def get_temp_df(df, reviews_list):
    invisible_reviews_list_all = []
    for i in tqdm(range(len(df))):
        end_int = 1656606800
        start = int(end_int - (30 * 86400))
        game_id = str(df.game_id[i])
        temp_dates = reviews_list[reviews_list['game_id'] == game_id]
        temp_dates = temp_dates[temp_dates['timestamp_created'].between(start, end_int - 1)]
        temp_dates = temp_dates.sort_values("timestamp_created", ascending=False)
        main_bar_list = list(df.id[i])
        temp_dates_list = list(temp_dates.recommendation_id)
        main_bar_list = [str(item) for item in main_bar_list]
        temp_dates_list = [str(item) for item in temp_dates_list]
        sidebar_temp_dates_list = [x for x in temp_dates_list if x not in main_bar_list]
        sidebar_temp_dates_list = sidebar_temp_dates_list[:10]
        invisible_reviews_list_all.append(sidebar_temp_dates_list)
    return invisible_reviews_list_all

sidebar_reviews_list = get_temp_df(validating, df)


In [None]:
def convert_id_to_review(df, list_of_review):
    review_list = []
    for review_id in list_of_review:
        for j in range(len(df)):
            if str(df.recommendation_id[j]) == str(review_id):
                review_list.append(df.review[j])
    return review_list

for reviews_ids in sidebar_reviews_list:
    print(convert_id_to_review(df, reviews_ids))


["Pretty relaxing space sim, flying isn't too bad and once you get the hang of what you should upgrade first and work toward, you're set. I've been getting by just fine playing alone with just the bots to help, but playing online is where it shines for extra enjoyment haha :D", 'Nice game to playing with your friends', "Really fun game putting you and several other players in control of a spaceship, this game completely depends on the people you're playing with, but In my experience it's not that difficult to find a good crew of randoms. The challenges this game gives can be somewhat difficult with some puzzles, but the spaceship combat is like no other! \nI especially love the engineer role as balancing power is a really fun challenge that requires constant attnetion during fights as you need to maintain power to shields and weapons without overheating your reactor! With good communication you can achieve this by having cool down periods where you throttle the reactor. As a captain yo