# Setup

In [1]:
import platform
import types
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pickle
import random
from datetime import datetime, timedelta
from tqdm import tqdm
import time
import math
from typing import List
from scipy import stats
import types

print(platform.python_version())

def imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            try:
                yield val.__name__, val.__version__
            except AttributeError:
                yield val.__name__


In [2]:
data_location = '../data/raw_data/'
with open(f'{data_location}games_list.pkl', 'rb') as fp1: 
    games_id_list = pickle.load(fp1) 
games_id_list = list(games_id_list)

print(len(games_id_list))

673

In [3]:
def random_no_generator():
    return random.randint(0, len(games_id_list) - 1)

def generate_five_gameIDs(games_id_list):
    random_id_list = []
    while len(random_id_list) < 5:
        no = random_no_generator()
        game_id = games_id_list[no]
        if game_id not in random_id_list:
            random_id_list.append(game_id)
    return random_id_list

random_id_list = generate_five_gameIDs(games_id_list)


In [4]:
print(random_id_list)

['1097580', '1786790', '450540', '1079260', '414700']


# Resimulation stage

## Gathering reviews in real-time from the 5 randomly selected video-games

In [5]:
# extracting the reviews of the 5 games in real time.

def get_one_review(appid, params={'json':1}):
        url = 'https://store.steampowered.com/appreviews/'
        response = requests.get(url=url+appid, params=params, headers={'User-Agent': 'Mozilla/5.0'})
        return response.json()


def get_all_reviews(appid):
    review_count=int(((get_one_review(str(appid)))["query_summary"])["total_reviews"])
    reviews = []
    cursor = '*'
    params = {
            'json' : 1,
            'filter' : 'all',
            'language' : 'english',
            'day_range' : 9223372036854775807,
            'review_type' : 'all',
            'purchase_type' : 'all'
            }
    while review_count > 0:
        params['cursor'] = cursor.encode()
        params['num_per_page'] = min(100, review_count)
        review_count -= 100
        response = get_one_review(appid, params)
        cursor = response['cursor']
        reviews += response['reviews']
        if len(response['reviews']) < 100:
          break
    return (reviews)

def collect_reviews(FPS_appids):
  review_list=[]
  for i in (range(len(FPS_appids))):
    appid=str(FPS_appids[i])
    reviews=get_all_reviews(appid)
    review_list.append(reviews)
  return(review_list)

In [6]:
validating_list=collect_reviews(random_id_list)

In [7]:
def create_dataframe_from_reviews(game_ids, reviews_list):
    columns = ['game_id', 'score', 'steam_id', 'review', 'timestamp_created', 'timestamp_updated', 'votes_up', 'recommendation_id']
    data = []

    for game_id, game_reviews in zip(game_ids, reviews_list):
        for review in game_reviews:
            data.append([
                game_id,
                review["author"]["steamid"],
                review["review"],
                review["timestamp_created"],
                review["timestamp_updated"],
                review["weighted_vote_score"],
                review["votes_up"],
                review["recommendationid"]
            ])
            data["game_id"]=data["game_id"].astype(str)
            data["timestamp_created"]=(data["timestamp_created"]).astype(int)

    return pd.DataFrame(data, columns=columns)


In [8]:
df = create_dataframe_from_reviews(random_id_list, validating_list)
df.to_csv(" ../data/interim_data/02_algorithm_validation/Validation set 3/validation_group_3.csv")

## Testing our hypothesized sorting algorithm for the default-sorted 'summary' reviews

In [11]:
def sort_and_rearrange(temp_df):
    temp_df["score"] = pd.to_numeric(temp_df["score"])
    temp_df = temp_df.sort_values("score", ascending=False)
    temp_df["index"] = [i for i in range(len(temp_df))]
    temp_df = temp_df.set_index("index")
    return temp_df

def get_x_days_ago(end_int, days):
    end = datetime.fromtimestamp(end_int)
    start = (end - timedelta(days=days))
    start_int = int(time.mktime(start.timetuple()))
    return start_int

def get_x_days_ago_reviews(days, df, end_int):
    start = get_x_days_ago(int(end_int), days)
    df['timestamp_created'] = pd.to_numeric(df['timestamp_created'])
    temp_dates = df[df['timestamp_created'].between(start, end_int - 1)]
    return temp_dates

def condition_against(temp1, temp2, difference):
    if len(temp1) == 10:
        return temp1
    elif difference == 10 or len(temp2) == 0:
        return temp2[:len(temp2)]
    else:
        for i in range(difference):
            if i < len(list(temp2["score"])) and float(list(temp2["score"])[i]) > float(list(temp1["score"])[0]):
                to_be_added = temp2[i:i + 1]
                temp1 = pd.concat([temp1, to_be_added])
        return temp1

def temp_conditional_return_1(time_created, day_range1, day_range_ultimate, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff = day_range_ultimate - day_range1
    in_between_days_reviews = get_x_days_ago_reviews(days_diff, df, day_range1_date)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(in_between_days_reviews)
    return condition_against(temp1, temp2, difference)

def temp_conditional_return_2(time_created, day_range1, day_range2, day_range_ultimate, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff_1 = day_range2 - day_range1
    day_range2_reviews = get_x_days_ago_reviews(days_diff_1, df, day_range1_date)
    day_range2_date = get_x_days_ago(time_created, day_range2)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(day_range2_reviews)
    temp = condition_against(temp1, temp2, difference)
    difference_2 = 10 - len(temp)
    days_diff_2 = day_range_ultimate - day_range2
    day_range3_reviews = get_x_days_ago_reviews(days_diff_2, df, day_range2_date)
    temp3 = sort_and_rearrange(day_range3_reviews)
    return condition_against(temp, temp3, difference_2)

def temp_conditional_return_3(time_created, day_range1, day_range2, day_range_3, start_date, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff_1 = day_range2 - day_range1
    day_range2_reviews = get_x_days_ago_reviews(days_diff_1, df, day_range1_date)
    day_range2_date = get_x_days_ago(time_created, day_range2)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(day_range2_reviews)
    temp = condition_against(temp1, temp2, difference)
    difference_2 = 10 - len(temp)
    days_diff_2 = day_range_3 - day_range2
    temp3 = sort_and_rearrange(day_range3_reviews)
    temp = condition_against(temp, temp3, difference_2)
    day_range3_date = get_x_days_ago(time_created, day_range_3)
    day_range_4 = int((time_created - start_date) / 86400)
    difference_3 = 10 - len(temp)
    days_diff_3 = day_range_4 - day_range_3
    day_range4_reviews = get_x_days_ago_reviews(days_diff_3, df, day_range3_date)
    temp4 = sort_and_rearrange(day_range4_reviews)
    return condition_against(temp, temp4, difference_3)

def get_list_of_visible_reviews(df):
    order_of_visibility, order_of_scores, order_of_times, list_of_visible_id = [], [], [], []
    for i in range(len(df)):
        player = str(df["steam_id"][i])
        if player != "validating":
            continue
        game_id = str(df["game_id"][i])
        time_created = int(df["timestamp_created"][i])
        temp = df[df['game_id'] == game_id]
        temp = temp[temp['votes_up'] != 0]
        temp_dates = get_x_days_ago_reviews(30, temp, time_created)
        if len(temp) == 0:
            temp = temp_dates
        elif len(temp_dates) >= 10:
            temp = temp_dates
        else:
            temp_90_dates = temp_conditional_return_1(time_created, 30, 90, temp)
            if len(temp_90_dates) >= 10:
                temp = temp_90_dates
            else:
                temp_180_dates = temp_conditional_return_2(time_created, 30, 90, 180, temp)
                if len(temp_180_dates) >= 10:
                    temp = temp_180_dates
                else:
                    start_date = int(temp['timestamp_created'].min())
                    temp = temp_conditional_return_3(time_created, 30, 90, 180, start_date, temp)

        temp = sort_and_rearrange(temp)
        n = min(len(temp), 10)
        list_of_visible = list(temp["review"][:n]) 
        list_of_scores = list(temp["score"][:n])
        list_of_times = list(temp["timestamp_created"][:n])
        list_of_visible_2 = list(temp["recommendation_id"][:n])
        list_of_visible_id.append(list_of_visible_2)
        order_of_visibility.append(list_of_visible)
        order_of_scores.append(list_of_scores)
        order_of_times.append(list_of_times)

    return order_of_visibility, order_of_scores, order_of_times, list_of_visible_id


In [12]:
# Creating hypothetical data as if we are the reviewers writing the review in real time
df = df[df['steam_id'] != "validating"]
stimulated_time = 1699667479
game_id = random_id_list
scores = [random.random() for i in range(len(random_id_list))]
steam_id_list = ["validating"] * 5
player_review_list = steam_id_list
time_stamp_created_list = [stimulated_time] * 5
recommendation_id_list_2 = ["1", "2", "3", "4", "5"]
time_stamp_updated_list = time_stamp_created_list
votes_up_list = [0] * 5

stimulated = pd.DataFrame(
    list(zip(game_id, scores, steam_id_list, player_review_list, time_stamp_created_list, time_stamp_updated_list, votes_up_list, recommendation_id_list_2)),
    columns=['game_id', 'score', 'steam_id', 'review', 'timestamp_created', 'timestamp_updated', 'votes_up', 'recommendation_id']
)

df = pd.concat([df, stimulated])
df["index"] = list(range(len(df)))
df = df.set_index("index")


In [13]:
# Perform the re-simulation with the hypothesized default-sorted algorithm
order_of_visible_reviews, order_of_scores, order_of_times, id_list = get_list_of_visible_reviews(df)
validating = pd.DataFrame(
    list(zip(game_id, order_of_visible_reviews, order_of_scores, order_of_times, id_list)),
    columns=['game_id', 'reviews', 'scores', 'times_order', 'id']
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["score"] = pd.to_numeric(temp_df["score"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["score"] = pd.to_numeric(temp_df["score"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["score"] = pd.to_numeric(temp_df["score"])
A value is trying to be set on a copy of a slice fro

In [14]:
def print_nicely(df):
    game_list, reviews_list, order = [], [], []
    for i in range(len(df)):
        game = df["game_id"][i]
        reviews = list(df["reviews"][i])
        for j in range(10):
            game_list.append(game)
            reviews_list.append(str(reviews[j]))
            order.append(j + 1)
    new_df = pd.DataFrame(
        list(zip(game_list, reviews_list, order)), 
        columns=['game_id', 'review', "order"]
    )
    return new_df


In [15]:
validation=print_nicely(validating)

In [16]:
validation

Unnamed: 0,game_id,review,order
0,1097580,I play porn games on my main account,1
1,1097580,"⠘⡀⠀Yes, I play NSFW games\n⠀⠀⠀⠑⡀ on my main ac...",2
2,1097580,I've never had more fun disappointing my ances...,3
3,1097580,This is a horrible game with tons of micro-tra...,4
4,1097580,I played the game and I got to say the story i...,5
5,1097580,Be prepared for the fact that in order to get ...,6
6,1097580,The SIMS of H games. As of October 2023:\n\n- ...,7
7,1097580,you cannot have sex in the game,8
8,1097580,"I don't care with any maids or any heroines , ...",9
9,1097580,rape,10


## Testing our algorithm for the recently-sorted reviews displayed on the miniature side-bar of the webpage.

In [17]:
def get_temp_df(df, reviews_list):
    invisible_reviews_list_all = []
    for i in tqdm(range(1699667479(df))):
        end_int = 1699662003
        start = int(end_int - (30 * 86400))
        game_id = str(df.game_id[i])
        temp_dates = reviews_list[(reviews_list['game_id'] == game_id)]
        temp_dates = temp_dates[temp_dates['timestamp_created'].between(start, end_int - 1)]
        temp_dates = temp_dates.sort_values("timestamp_created", ascending=False)
        observable_list = [str(item) for item in list(df.id[i])]
        temp_dates_list = [str(item) for item in list(temp_dates.recommendation_id)]
        sidebar_temp_dates_list = [x for x in temp_dates_list if x not in observable_list][:10]
        invisible_reviews_list_all.append(sidebar_temp_dates_list)
    return invisible_reviews_list_all

sidebar_reviews_list = get_temp_df(validating, df)

def convert_id_to_review(df, list_of_review):
    review_list = []
    for review_id in list_of_review:
        for j in range(len(df)):
            if str(df.recommendation_id[j]) == str(review_id):
                review_list.append(df.review[j])
    return review_list

for reviews_ids in sidebar_reviews_list:
    print(convert_id_to_review(df, reviews_ids))

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 624.93it/s]


['My wallet hates me.', '.']
['Greatest shit that ever exists, sad that the other games are nothing compared to this, except for like ambrosia.', "My first RPG H-game. Lots of fun gameplay and different types of scenes for different things you fight. I have a save with about 11.5 hours and now I feel like I have to grind to be able to progress to the next stage, and with the repetitive nature of some of the fights at this stage and things to do, I got kinda bored, but I'll probably play again someday. I love the H-content during the fights and the different outfits you get to wear etc, and if you love corruption then this is a perfect game for you!!", 'hentai game that lets you play around. thats is a yes', 'The constant monster fights gets really boring after a while, and the e-scenes are a little ... off ? \nIt`s hard to describe, they are well made, diverse, caters to a lot of fetishes and there are a lot of them but they still feel bland somehow.\nI`m also really not a fan of havin