In [1]:
import pandas as pd
import numpy as np
import requests

In [2]:
def get_reviews(appid, params={'json':1}):
        url = 'https://store.steampowered.com/appreviews/'
        response = requests.get(url=url+appid, 
                                params=params, 
                                headers=None)
        return response.json()

    
def get_n_reviews(appid, n=100):
    reviews = []
    #title = None
    params = {
            'json' : 1,
            'filter' : 'recent',
            'language' : 'english', 
            'day_range' : None,
            'review_type' : 'all',
            'purchase_type' : 'all' 
            }

    cursor = '*'
    while n > 0:
        params['cursor'] = cursor.encode()
       
        params['num_per_page'] = min(100, n)
        n -= 100
        response = get_reviews(appid, params)
        cursor = response['cursor']      
        reviews += response['reviews']
        
        if len(response['reviews']) < 100: break
   
    return reviews
def getReviews(appid, reviewNum=1000):
    reviews = get_n_reviews(appid, n=reviewNum)
    df = pd.DataFrame(columns = ["review_text","timestamp_created","voted_up", "votes_up", \
                             "votes_funny", "weighted_vote_score","comment_count"])
    for review in reviews:
        playtime_at_review = review["author"]["playtime_at_review"]
        timestamp_created = review["timestamp_created"]
        review_text = review["review"]
        voted_up = review["voted_up"]
        votes_up = review["votes_up"]
        votes_funny = review["votes_funny"]
        weighted_vote_score = review["weighted_vote_score"]
        comment_count = review["comment_count"]

        df = df.append({
                        "review_text":review_text,
                        "timestamp_created":timestamp_created,
                        "voted_up":voted_up, 
                        "votes_up":votes_up, 
                        "votes_funny":votes_funny, 
                        "weighted_vote_score": weighted_vote_score,
                        "comment_count":comment_count,
                        },
                       ignore_index=True)   
    return df


In [3]:
import networkx as nx
import re
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
from nltk.tokenize import sent_tokenize

# Read the text and tokenize into sentences
def read_article(text):
    
    sentences =[]
    
    sentences = sent_tokenize(text)
    for sentence in sentences:
        sentence.replace("[^a-zA-Z0-9]"," ")

    return sentences
    

# Create vectors and calculate cosine similarity b/w two sentences
def sentence_similarity(sent1,sent2,stopwords=None):
    if stopwords is None:
        stopwords = []
    
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
    
    all_words = list(set(sent1 + sent2))
    
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
    
    #build the vector for the first sentence
    for w in sent1:
        if not w in stopwords:
            vector1[all_words.index(w)]+=1
    
    #build the vector for the second sentence
    for w in sent2:
        if not w in stopwords:
            vector2[all_words.index(w)]+=1
            
    return 1-cosine_distance(vector1,vector2)

# Create similarity matrix among all sentences
def build_similarity_matrix(sentences,stop_words):
    #create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences),len(sentences)))
    
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1!=idx2:
                similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1],sentences[idx2],stop_words)
                
    return similarity_matrix


# Generate and return text summary
def generate_summary(text,top_n):
    #nltk.download('stopwords')
    #nltk.download('punkt')
    stop_words = stopwords.words('english')
    summarize_text = []
    
    # Step1: read text and tokenize
    sentences = read_article(text)
    
    # Steo2: generate similarity matrix across sentences
    sentence_similarity_matrix = build_similarity_matrix(sentences,stop_words)
    print()
    # Step3: Rank sentences in similarirty matrix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)
    scores = nx.pagerank(sentence_similarity_graph)
    
    #Step4: sort the rank and place top sentences
    ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(sentences)),reverse=True)
    
    # Step 5: get the top n number of sentences based on rank    
    for i in range(top_n):
        summarize_text.append(ranked_sentences[i][1])
    
    # Step 6 : outpur the summarized version
    return " ".join(summarize_text)

In [4]:
appid = "1145360"
df = getReviews(appid, reviewNum=150)
df = df[['review_text','voted_up']]
text = df['review_text'].dropna().tolist()
text_merge = ' '.join(text)


In [5]:
%%time
generate_summary(text_merge,3)



Wall time: 19.3 s


"And I enjoyed it immensely - so much so that I bought it on Steam after I bought it on the Switch to enjoy better graphics :)\nThis game completely ruined me for all other roguelikes - I have been searching for similar games, but there are none. dad bad\ndoggo goodie yup, played a good game before you can pet 3 headed dogo enemies go brr God game You can't hade this game i like this game but the green bow lady makes my fairly oddparents underwear wet This game is excellent in more than one way: Art, storytelling, comedy, combat, repeatability and difficulty. The amount of dialogue and story they fit into a ROGUE-LIKE game is incredible."