# Setup

In [2]:
import platform
import types
import requests
from bs4 import BeautifulSoup
import pandas as pd
import pickle
import random
from datetime import datetime, timedelta
from tqdm import tqdm
import time
import math
from typing import List
from scipy import stats
import types

def imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            try:
                yield val.__name__, val.__version__
            except AttributeError:
                yield val.__name__
list(imports())

In [3]:
data_location = '../data/raw_data/'
with open(f'{data_location}games_list.pkl', 'rb') as fp1: 
    games_id_list = pickle.load(fp1) 
games_id_list = list(games_id_list)

print(len(games_id_list))

673

In [4]:
def random_no_generator():
    return random.randint(0, len(games_id_list) - 1)

def generate_five_gameIDs(games_id_list):
    random_id_list = []
    while len(random_id_list) < 5:
        no = random_no_generator()
        game_id = games_id_list[no]
        if game_id not in random_id_list:
            random_id_list.append(game_id)
    return random_id_list

random_id_list = generate_five_gameIDs(games_id_list)

In [5]:
random_id_list

['1238860', '775940', '1352020', '1460510', '1069640']


# Resimulation stage

## Gathering reviews in real-time from the 5 randomly selected video-games

In [6]:
# extracting the reviews of the 5 games in real time. 

def fetch_review(appid, params={'json': 1}):
    url = 'https://store.steampowered.com/appreviews/'
    response = requests.get(url=url + appid, params=params, headers={'User-Agent': 'Mozilla/5.0'})
    return response.json()

def fetch_all_reviews(appid):
    review_count = int(((fetch_review(str(appid)))["query_summary"])["total_reviews"])
    reviews = []
    cursor = '*'
    params = {
        'json': 1,
        'filter': 'all',
        'language': 'english',
        'day_range': 9223372036854775807,
        'review_type': 'all',
        'purchase_type': 'all'
    }
    while review_count > 0:
        params['cursor'] = cursor.encode()
        params['num_per_page'] = min(100, review_count)
        review_count -= 100
        response = fetch_review(appid, params)
        cursor = response['cursor']
        reviews += response['reviews']
        if len(response['reviews']) < 100: 
            break
    return reviews

def collect_reviews(FPS_appids):
    review_list = []
    for appid in tqdm(FPS_appids):
        reviews = fetch_all_reviews(str(appid))
        review_list.append(reviews)
    return review_list


In [7]:
validating_list = collect_reviews(random_id_list)




In [8]:
def create_dataframe_from_reviews(game_ids, reviews_list):
    columns = ['game_id', 'score', 'steam_id', 'review', 'timestamp_created', 'timestamp_updated', 'votes_up', 'recommendation_id']
    data = []

    for game_id, game_reviews in zip(game_ids, reviews_list):
        for review in game_reviews:
            data.append([
                game_id,
                review["author"]["steamid"],
                review["review"],
                review["timestamp_created"],
                review["timestamp_updated"],
                review["weighted_vote_score"],
                review["votes_up"],
                review["recommendationid"]
            ])
            data["game_id"]=data["game_id"].astype(str)
            data["timestamp_created"]=(data["timestamp_created"]).astype(int)

    return pd.DataFrame(data, columns=columns)


In [9]:
df = create_dataframe_from_reviews(random_id_list, validating_list)
df.to_csv(" ../data/interim_data/02_algorithm_validation/Validation set 4/validation_group_4.csv")




## Testing our hypothesized sorting algorithm for the default-sorted 'summary' reviews

In [16]:
def sort_and_rearrange(temp_df):
    temp_df["score"] = pd.to_numeric(temp_df["score"])
    temp_df = temp_df.sort_values("score", ascending=False)
    temp_df["index"] = [i for i in range(len(temp_df))]
    temp_df = temp_df.set_index("index")
    return temp_df

def get_x_days_ago(end_int, days):
    end = datetime.fromtimestamp(end_int)
    start = (end - timedelta(days=days))
    start_int = int(time.mktime(start.timetuple()))
    return start_int

def get_x_days_ago_reviews(days, df, end_int):
    start = get_x_days_ago(int(end_int), days)
    df['timestamp_created'] = pd.to_numeric(df['timestamp_created'])
    temp_dates = df[df['timestamp_created'].between(start, end_int - 1)]
    return temp_dates

def condition_against(temp1, temp2, difference):
    if len(temp1) == 10:
        return temp1
    elif difference == 10 or len(temp2) == 0:
        return temp2[:len(temp2)]
    else:
        for i in range(difference):
            if i < len(list(temp2["score"])) and float(list(temp2["score"])[i]) > float(list(temp1["score"])[0]):
                to_be_added = temp2[i:i + 1]
                temp1 = pd.concat([temp1, to_be_added])
        return temp1

def temp_conditional_return_1(time_created, day_range1, day_range_ultimate, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff = day_range_ultimate - day_range1
    in_between_days_reviews = get_x_days_ago_reviews(days_diff, df, day_range1_date)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(in_between_days_reviews)
    return condition_against(temp1, temp2, difference)

def temp_conditional_return_2(time_created, day_range1, day_range2, day_range_ultimate, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff_1 = day_range2 - day_range1
    day_range2_reviews = get_x_days_ago_reviews(days_diff_1, df, day_range1_date)
    day_range2_date = get_x_days_ago(time_created, day_range2)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(day_range2_reviews)
    temp = condition_against(temp1, temp2, difference)
    difference_2 = 10 - len(temp)
    days_diff_2 = day_range_ultimate - day_range2
    day_range3_reviews = get_x_days_ago_reviews(days_diff_2, df, day_range2_date)
    temp3 = sort_and_rearrange(day_range3_reviews)
    return condition_against(temp, temp3, difference_2)

def temp_conditional_return_3(time_created, day_range1, day_range2, day_range_3, start_date, df):
    day_range1_reviews = get_x_days_ago_reviews(day_range1, df, time_created)
    day_range1_date = get_x_days_ago(time_created, day_range1)
    days_diff_1 = day_range2 - day_range1
    day_range2_reviews = get_x_days_ago_reviews(days_diff_1, df, day_range1_date)
    day_range2_date = get_x_days_ago(time_created, day_range2)
    difference = 10 - len(day_range1_reviews)
    temp1 = sort_and_rearrange(day_range1_reviews)
    temp2 = sort_and_rearrange(day_range2_reviews)
    temp = condition_against(temp1, temp2, difference)
    difference_2 = 10 - len(temp)
    days_diff_2 = day_range_3 - day_range2
    temp3 = sort_and_rearrange(day_range3_reviews)
    temp = condition_against(temp, temp3, difference_2)
    day_range3_date = get_x_days_ago(time_created, day_range_3)
    day_range_4 = int((time_created - start_date) / 86400)
    difference_3 = 10 - len(temp)
    days_diff_3 = day_range_4 - day_range_3
    day_range4_reviews = get_x_days_ago_reviews(days_diff_3, df, day_range3_date)
    temp4 = sort_and_rearrange(day_range4_reviews)
    return condition_against(temp, temp4, difference_3)

def get_list_of_visible_reviews(df):
    order_of_visibility, order_of_scores, order_of_times, list_of_visible_id = [], [], [], []
    for i in range(len(df)):
        player = str(df["steam_id"][i])
        if player != "validating":
            continue
        game_id = str(df["game_id"][i])
        time_created = int(df["timestamp_created"][i])
        temp = df[df['game_id'] == game_id]
        temp = temp[temp['votes_up'] != 0]
        temp_dates = get_x_days_ago_reviews(30, temp, time_created)
        if len(temp) == 0:
            temp = temp_dates
        elif len(temp_dates) >= 10:
            temp = temp_dates
        else:
            temp_90_dates = temp_conditional_return_1(time_created, 30, 90, temp)
            if len(temp_90_dates) >= 10:
                temp = temp_90_dates
            else:
                temp_180_dates = temp_conditional_return_2(time_created, 30, 90, 180, temp)
                if len(temp_180_dates) >= 10:
                    temp = temp_180_dates
                else:
                    start_date = int(temp['timestamp_created'].min())
                    temp = temp_conditional_return_3(time_created, 30, 90, 180, start_date, temp)

        temp = sort_and_rearrange(temp)
        n = min(len(temp), 10)
        list_of_visible = list(temp["review"][:n]) 
        list_of_scores = list(temp["score"][:n])
        list_of_times = list(temp["timestamp_created"][:n])
        list_of_visible_2 = list(temp["recommendation_id"][:n])
        list_of_visible_id.append(list_of_visible_2)
        order_of_visibility.append(list_of_visible)
        order_of_scores.append(list_of_scores)
        order_of_times.append(list_of_times)

    return order_of_visibility, order_of_scores, order_of_times, list_of_visible_id


In [17]:
# Creating hypothetical data as if we are the reviewers writing the review in real time
df = df[df['steam_id'] != "validating"]
stimulated_time = 1699337110
game_id = random_id_list
scores = [random.random() for i in range(len(random_id_list))]
steam_id_list = ["validating"] * 5
player_review_list = steam_id_list
time_stamp_created_list = [stimulated_time] * 5
recommendation_id_list_2 = ["1", "2", "3", "4", "5"]
time_stamp_updated_list = time_stamp_created_list
votes_up_list = [0] * 5

stimulated = pd.DataFrame(
    list(zip(game_id, scores, steam_id_list, player_review_list, time_stamp_created_list, time_stamp_updated_list, votes_up_list, recommendation_id_list_2)),
    columns=['game_id', 'score', 'steam_id', 'review', 'timestamp_created', 'timestamp_updated', 'votes_up', 'recommendation_id']
)

df = pd.concat([df, stimulated])
df["index"] = list(range(len(df)))
df = df.set_index("index")


In [18]:
# Perform the re-simulation with the hypothesized default-sorted algorithm
order_of_visible_reviews, order_of_scores, order_of_times, id_list = get_list_of_visible_reviews(df)
validating = pd.DataFrame(
    list(zip(game_id, order_of_visible_reviews, order_of_scores, order_of_times, id_list)),
    columns=['game_id', 'reviews', 'scores', 'times_order', 'id']
)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["score"] = pd.to_numeric(temp_df["score"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["score"] = pd.to_numeric(temp_df["score"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp_df["score"] = pd.to_numeric(temp_df["score"])
A value is trying to be set on a copy of a slice fro

In [19]:
def print_nicely(df):
    game_list, reviews_list, order = [], [], []
    for i in range(len(df)):
        game = df["game_id"][i]
        reviews = list(df["reviews"][i])
        for j in range(10):
            game_list.append(game)
            reviews_list.append(str(reviews[j]))
            order.append(j + 1)
    new_df = pd.DataFrame(
        list(zip(game_list, reviews_list, order)), 
        columns=['game_id', 'review', "order"]
    )
    return new_df


In [20]:
validation=print_nicely(validating)

In [21]:
validation

Unnamed: 0,game_id,review,order
0,1238860,Beautiful game with 100s of hours on multiple ...,1
1,1238860,⣿⣿⣿⣿⣿⣿⣿⣿⡿⠿⠛⠛⠛⠋⠉⠈⠉⠉⠉⠉⠛⠻⢿⣿⣿⣿⣿⣿⣿⣿\n⣿⣿⣿⣿⣿⡿⠋⠁⠀⠀⠀⠀⠀⠀...,2
2,1238860,"only BF[b]2[/b], BF[b]3[/b], BF[b]4[/b]",3
3,1238860,"Take Battlefield 4 and Remaster it , Dont touc...",4
4,1238860,ea launcher makes me want to eat my doorknob,5
5,1238860,This game gave me:\n\n· Acne\n· AIDS\n· Allerg...,6
6,1238860,Better than sex,7
7,1238860,"The vehicles are fun, the gunplay is crisp, an...",8
8,1238860,WHen u own the game in ur steam library but u ...,9
9,1238860,Average 500% Tickets conquest server host\n\n⣿...,10


## Testing our algorithm for the recently-sorted reviews displayed on the miniature side-bar of the webpage.

In [24]:
def get_temp_df(df, reviews_list):
    invisible_reviews_list_all = []
    for i in tqdm(range(len(df))):
        end_int = 1699337110
        start = int(end_int - (30 * 86400))
        game_id = str(df.game_id[i])
        temp_dates = reviews_list[(reviews_list['game_id'] == game_id)]
        temp_dates = temp_dates[temp_dates['timestamp_created'].between(start, end_int - 1)]
        temp_dates = temp_dates.sort_values("timestamp_created", ascending=False)
        observable_list = [str(item) for item in list(df.id[i])]
        temp_dates_list = [str(item) for item in list(temp_dates.recommendation_id)]
        sidebar_temp_dates_list = [x for x in temp_dates_list if x not in observable_list][:10]
        invisible_reviews_list_all.append(sidebar_temp_dates_list)
    return invisible_reviews_list_all

sidebar_reviews_list = get_temp_df(validating, df)

def convert_id_to_review(df, list_of_review):
    review_list = []
    for review_id in list_of_review:
        for j in range(len(df)):
            if str(df.recommendation_id[j]) == str(review_id):
                review_list.append(df.review[j])
    return review_list

for reviews_ids in sidebar_reviews_list:
    print(convert_id_to_review(df, reviews_ids))

100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 579.68it/s]


['"Noooo you can\'t just use the tank as a long-range explosive sniper! You\'re heavily armored you\'re supposed to push points!"\n\n"No I don\'t think I will. Everyone runs engineer w/anti-tank mines/RPGs or recon/sup C4" - Some chad in chat who proceeds to hold three points down by himself on top of a fucking hill.\n\n\nVery good game, highly recommend nabbing it when it\'s on a sale.', 'gun', 'Requires EA', 'i love this game', 'best battlefield game', 'Dead game, no active servers.', 'b', '"I don\'t know if the game was good for the year it was published, but I can say that the AI (artificial intelligence) of the bots was terrible and it ruined the gaming experience', 'this game sucks, anyone who says its good is delusional and needs to be checked into a mental facility this game fucking hurts to play man why is little billy cross mapping me WITH A SHOTGUN WITH BUCKSHOT', 'If there was an "in-between" button for this game, I would certainly give it that rating.\n\nThat being said, I