In [103]:
import pickle

import numpy as np 
import pandas as pd

import re 
import string 

import nltk
#from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import CountVectorizer 

from langdetect import detect

from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import NMF
# from sklearn.metrics.pairwise import cosine_similarity

import spacy
from gensim import corpora, models, similarities, matutils

In [2]:
with open('pokemon.pickle','rb') as read_file:
    df = pickle.load(read_file)

### Data Cleaning


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2903 entries, 0 to 887
Data columns (total 5 columns):
name      2903 non-null object
date      2903 non-null object
rating    2903 non-null object
review    2903 non-null object
game      2903 non-null object
dtypes: object(5)
memory usage: 136.1+ KB


In [4]:
df['rating'] = df['rating'].astype(int)

In [5]:
### let's group the reviews by their ratings 
### following metacritics, 0 to 4 = negative, 5 to 7 mixed, 8 and above = positive 

def sentiment(x):
    if x > 7:
        return 'positive'
    if x < 5:
        return 'negative'
    else: return 'mixed'

df['sentiment'] = df['rating'].apply(lambda x:sentiment(x))


In [6]:
df['sentiment'].value_counts(normalize = True)

negative    0.610059
positive    0.306235
mixed       0.083707
Name: sentiment, dtype: float64

In [7]:
### Some users posted on both pokemon sword and shield. 
### Did some checks, these reviews were the same  

df[df.duplicated('name')]

Unnamed: 0,name,date,rating,review,game,sentiment
0,Metagrass,"Nov 15, 2019",2,"I have also done a review for Pokemon Sword, b...",shield,negative
1,NintendoGuy64,"Nov 15, 2019",0,"As a lifelong fan of Pokemon games, I was ecst...",shield,negative
4,Otonaburu,"Nov 15, 2019",4,What should have been a giant leap to signific...,shield,negative
7,Gamermangamer,"Nov 15, 2019",1,"Was promised a game for ""long time fans of the...",shield,negative
8,Fumetic,"Nov 15, 2019",3,"As these are largely the same games, I have pa...",shield,negative
...,...,...,...,...,...,...
871,HollyS,"Nov 20, 2019",2,"Very short, bland and low quality Pokemon game...",shield,negative
875,Lawrence7,"Nov 20, 2019",10,For anyone debating whether they will like the...,shield,positive
877,sojasonk,"Nov 20, 2019",1,"Lazy writing, bad graphics, an absolute medioc...",shield,negative
879,KrakenOfPepsi,"Nov 20, 2019",4,Metacritic has a pretty small character limit ...,shield,negative


In [8]:
df.review[df['name'] == 'Mack_thge_Sack']

1313    I'm going to state my points and not my emotio...
887     I'm going to state my points and not my emotio...
Name: review, dtype: object

In [9]:
df.reset_index(inplace = True, drop = True)

In [10]:
#dropping duplicates 
df.drop_duplicates(subset='name', keep = 'first', inplace = True)

In [11]:
### cleaning the text 

def make_lower(text):
    return text.lower()

def remove_digits(text):
    return re.sub('\d', '', text)

def clean_text(text):
    text = make_lower(text)
    #text = remove_punctuation(text)
    text = remove_digits(text)
    return text

In [12]:
df['review'] = df['review'].apply(lambda x:clean_text(x))

In [13]:
#df['review'] = df['review'].apply(lambda x:remove_stopwords(str.split(x)))

In [14]:
### check for english words 

In [15]:
def language_detection(x): 
    result = detect(x)
    if result == 'en':
        return x 
    else: return np.NaN 
    
df['review'] = df['review'].apply(lambda x:language_detection(x))

In [16]:
df.dropna(inplace = True)

In [17]:
# lemmatizer=WordNetLemmatizer()

# def lemmatize_words(x):
#     lemmed_string = ''
#     for word in x.split():
#         lemmed_string = lemmed_string+' '+lemmatizer.lemmatize(word)  
#     return lemmed_string.lstrip()

sp = spacy.load('en_core_web_sm')

def lemmatize_words(x):
    text = sp(x)
    lemmed_string =''
    for word in text:
        if word.lemma_ == '-PRON-':
            word.lemma_ = word.orth_ # change the string representation
            word.lemma = word.orth #
        else: lemmed_string = lemmed_string+' '+word.lemma_
    return lemmed_string.lstrip()

In [18]:
df['review'] = df['review'].apply(lambda x:lemmatize_words(x))

In [19]:
def remove_punctuation(text):
    return re.sub('[%s]' % re.escape(string.punctuation), '', text) 

df['review'] = df['review'].apply(lambda x:remove_punctuation(x))

In [28]:
def remove_r(x):
    return x.replace('\r','')

df['review'] = df['review'].apply(lambda x:remove_r(x))  

In [30]:
def strip_whitespace(x):
    return x.strip()

df['review'] = df['review'].apply(lambda x:strip_whitespace(x))  

In [86]:
def remove_rare(x):
    for word in freq:
        if word in x:
            return x.replace(word,'')   

df['review'] = df['review'].apply(lambda x:remove_rare(x))  

In [82]:
### create list of rare words by filtering on word counts
freq = pd.DataFrame(df.review.str.split(expand=True).stack().value_counts())
freq = freq[freq<11]
freq.dropna(inplace = True)
freq.reset_index(inplace = True)
freq = freq['index'].to_list()

In [90]:
df.to_pickle('dfclean.pickle')

# with open('dfclean.pickle','rb') as read_file:
#     df = pickle.load(read_file)

In [109]:
#freq = pd.DataFrame(df.review.str.split(expand=True).stack().value_counts())

In [96]:
### remove stopwords text, using this method just incase I want to add more stopwords 
nltk_stop_words = nltk.corpus.stopwords.words('english')

stop_words =  list(spacy.lang.en.stop_words.STOP_WORDS)
stop_words.extend(['game','pokemon','pokémon'])

for word in stop_words:
    if word in stop_words: 
        continue
    else: stop_words.append(word)

for word in stop_words:
    no_punct = remove_punctuation(word)
    if no_punct not in stop_words: 
        stop_words.append(no_punct)

## Splitting df by sentiment 

In [97]:
negative = df[df['sentiment']=='negative']
mixed = df[df['sentiment']=='mixed']
positive = df[df['sentiment']=='postive']

In [98]:
negative

Unnamed: 0,name,date,rating,review,game,sentiment
0,ChocolateCrane,"Nov 18, 2019",4,let address the elephant in the room first th...,sword,negative
3,firstlovezombie,"Nov 15, 2019",2,i really love pokemon which be why be so crit...,sword,negative
4,FilthyActs88,"Nov 15, 2019",1,pokémon sword and shield be a game of pure wh...,sword,negative
5,Ninjasuite,"Nov 15, 2019",3,this be what happen if a company realize that ...,sword,negative
6,JustrzBustrz,"Nov 15, 2019",2,’ an amazing feat how soulless this game feel ...,sword,negative
...,...,...,...,...,...,...
2889,Gonz04,"Nov 20, 2019",0,this game be so disappointing i literally end...,shield,negative
2891,InTheMood,"Nov 20, 2019",1,this be by far the bad pokémon game i have eve...,shield,negative
2893,Dreadwolf85,"Nov 20, 2019",3,lazy game design pokemon still do not hit eac...,shield,negative
2897,Cotroneo,"Nov 20, 2019",2,if consider this game a successor to the spin ...,shield,negative


In [113]:
vectorizer = CountVectorizer(stop_words=stop_words, ngram_range=(1,2),
                                   strip_accents = 'ascii', 
                                   token_pattern="\\b[a-z][a-z][a-z]+\\b")

In [115]:
doc_word = vectorizer.fit_transform(negative)
doc_word.shape

(6, 4)

In [101]:
pd.DataFrame(doc_word.toarray(), index=df.review, columns=vectorizer.get_feature_names()).head(10)

Unnamed: 0_level_0,aa,aa good,aaa,aaa console,aaa crash,aaa deserve,aaa developer,aaa element,aaa fall,aaa ile,...,zoo,zoo money,zoom,zoom max,zooopia,zooopia characer,zubat,zubat battle,zubat play,zubat tentacool
review,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
let address the elephant in the room first the absence of the national dex yes be terrible lazy blah blah blah i be sure have already hear this in hundred of other review so instead of waste time by complain about something be already well aware of i will foc on the rest of swsh s shortcoming for reference i have play every mainline pokemon game to completion and have currently sink hour into shield would imagine criticism also apply to sword in the interest of brevity i will be utilize acronyms swsh sword shield s m sun moon etc the most succinct way to put be swsh essentially feel like a ds game port to the switch this apply to every facet of the game from gameplay feature to visual presentation to story character i believe whether or not enjoy swsh depend entirely upon how view the th gen games contrary to what the dev promise last year swsh be most assuredly not mean for the core pokemon fan as a veteran of the series i can say with certainty the abysmally low level of difficulty in conjunction with the simplification and outright removal of many feature be the exact opposite of what would want swsh be nearly as simplified as lgpe which would not be an issue if the dev address as such but somehow seem to think this be what qualify as a satisfying mainline game in yes pokemon have always be a franchise aim primarily at child the game have never be truly difficult when compare to many of jrpg contemporary yet for some reason game freak feel the need to crank that difficulty set from easy down to braindead s m may have be easy but be league more challenging than shield accessibility be one thing but this be take to such an extreme nearly remove all the fun a hard mode option selectable from the outset would have work wonder and of course as be common for the past several entry in the pokemon series the post game content be practically non existent so swsh basically fail when view purely as a game but what of the other aspect i mention after all jrpg with subpar gameplay can still be surprisingly enjoyable if the narrative hold enough merit i be sad to report that swsh also completely fail in this regard the writing be oftentime so atrocio have find wish to skip these scene entirely this be par for the course as none of the mainline game bar the attempt make in b w have really deliver anything substantive in the way of competent storytelling however with the majority of other pokemon game at least have good gameplay to compensate for the lacklter story a factor which be completely absent from swsh due to inherent simplified nature and toddler pander difficulty well there be only one thing leave to discs since delve into too much detail on the above point cae to exceed the character limit the final point of contention be the visual fidelity and graphical offering swsh bring to the table this be after all a game make for the switch a console handheld hybrid with roughly x the computational power of pokemon s former home the ds let get the pesky number out of the way first swsh run at a dynamic resolution on both dock and portable mode in dock mode the resolution fluctuate from p to p likewise during portable play the high res be p which descend to p on frequent occasion this be somewhat disappointing consider lgpe be capable of run at p dock p portable at all time the framerate target be fps which exhibit frame pace problem and severe drop in a variety of area the most evident manifestation of these issue can be see in the wild area wherein both pacing and framerate be ph to absolute worst this be a massive problem consider the majority of time especially in post game will likely be spend in the wild area aside from the quantifiable metric highlight above there be plenty of other visual shortcoming to boot terrible terrain detail awful texture constant pop in stilted animation low polygon count for both the human character model as well as the pokemon and countless other issue such as these result in a game that resemble something release over a decade ago many detractor claim swsh look like a ps gc game which be definitely an exaggeration but what get be by no mean acceptable for a non indie game release in and yes many of the animation and all the return pokemon model have be copy and paste with negligible improvement for swsh be one thing to sacrifice content for a more polished high quality game but that be very clearly not what happen here i genuinely hope these issue can be sort out in future release i be not ready to give up hope on pokemon jt yet,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
copy of sword and shield come in the mail a bit early yesterday and while sword and shield be pretty cute game i have to admit there be some glaring problem that make this game feel like be need to bake in the oven a tad bit longer for starter i just want to say i never care for the controversy surround these game about not be able to transfer all of pokemon so this review be in no way biased towards that there be however other issue that do drag down the experience for i often run into performance issue frame drop tree or object randomly pop up in view and while at time the game do seem beautiful and comfy at other point the game almost seem sloppily put together the game be far too easy to the point of boredom even for a pokemon game i feel be overlevele very quickly i be often take down opponent only in one singular attack despite not go out of way to fight many monster the new exp share which spread experience across whole party seem to amplify this problem seem as if the game be not balance with this feature in mind as entire team be several level ahead of opponent for most of if not the entire game the adventure last roughly about hour for to complete which be awfully short for an rpg and there be not much can do after the journey be over may i add this game only have route previous entry have about ish i will not get into story spoiler in this review but the game be a bit more linear than i be expect main criticism stem from hop happy go lucky friend and rival who always seem to interrupt the flow there be hardly a moment where can walk around without be interrupt by dialogue or a cutscene by or some other character i audibly groan whenever hop appear on screen i do like a good story but i would also like to be able to play through the game without rival breathe down neck trainer customisation in this game be something i appreciate customize character be the wide have ever be in this series complete with photogenic trainer card that capture the essence of character i find this feature very wholesome and cute with many expression and pose to choose from i be look foward to see the wild area be a reimagine safari zone full of life but unfortunately be large empty and barren model walk around a bit stiffly and the texture be a bit dated and lackluster for a console game in animation from this game can either be very well do or extremely lacking there be one instance where one of the major pokemon in the story mode do not even have a turn animate for be walk cycle the pokemon s entire model be just rotate degree which be very noticeable and bizarre however speak of which i do like the very set of pokemon this generation there be quite a lot that be able to catch in the region old and new and be all pretty unique and sometimes goofy in design the new feature pokemon camp be very charming i love the animation in this mode be definitely at peak here in term of quality be quite entertaining to see scorbunny raboot play with the feather wand toy i have pokemon in camp interact with each other and feel very lively make curry be cute however i do have problem with for starter the ai for the pokemon can seem a bit buggy where may get stick walk in one place i can not say be common but do break the immersion a bit there be one instance where i have no background in the camp at all which seem like a bug but then i encounter a battle with a background very similar to this and i can only assume be intentional at time i almost feel there be more time and effort put into this side activity than the actual battle system at be core be definitely a pokemon game the basic formula of pokemon work in these game however i be still disappointed and i can without a doubt say that this be the least amount of enjoyment have get out of a mainline pokemon game so far pokemon sword and shield fail to truly innovate despite be the first mainline entry on any type of console and i can not say that be a sixty dollar pokemon game at all feel rush poorly optimize and half baked can i see a child enjoy this game yes can i see a newcomer to the series enjoy this game possibly can i see a longtime pokemon fan enjoy this game not likely unfortunately,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
the game be meh not even count the pokemon what be take out do not equal to what be gain if have nothing well to do and find a e copy in a few week that will be the only time be say to get,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
i really love pokemon which be why be so critical in this review be not review bombing i jt feel so strongly opposed to this game that i feel like i need to share opinion after take a long time and do the most i want to in the main story which only come to hour for although be fun feel very shallow while the town be fairly nice the route follow sun and moon s linear formula which make feel pointless while the town do look nice and some of the route be fun most environment look straight up bad specifically the wild area i personally would have much rather have more route in place of the wild area becae although the idea be solid the execution be awful the amount of pokemon that will pop up at the same time be silly have about fully evolve pokemon in one patch of grass at the same time there be a weird abundance of old pokemon compare to the new specifically in the wild area and most can not catch due to catch high level pokemon be lock behind badge while on the topic of gym although the gym be passable the pacing between all of be awful specifically after bea allister s gym the game become strangely devoid of content to the point where can get the last gym badge in only a few hour the entire main story seem to be the short to date and really make the content for dollar feel lack lastly the story of the game be the bad in the franchise most of happen after collect all badge which feel extremely weird and the villain base this time be literally an elevator have a few double battle in with hop there be absolutely no dungeon or any of anywhere for that matter the villain motive be stupid and the rehash sun and moon plot twist of the initial evil team not actually be the big bad be easily see come and team yell be not nearly as likable as team skull be i really want to enjoy this game but i could not the bare minimum i can say be that be somewhat fun but that be only becae pokemon game always be i hope future game in the series get more time to be work on becae despite the new pokemon character design and mic nothing else feel like put any love or care into pokemon sword and shield be soulless and other than a fine game for kid i have no clue why would spend money on this entry of the series,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
pokémon sword and shield be a game of pure when discount all of the technical issue do not do anything exceptional or grind break and be another entry that coast off the brand name the main story be just ok fill with unskipable cutscene that often have slowdown due to poor optimization the story suffer from the same issue as sun and moon where truly do not feel like character story which defeat the point of be an rpg the draw distance be genuinely abhorrent and further show that gamefreak have show to be incompetent game developer the game be ridiculously easy and streamline which in this case mean dumbe down the game be short only be around hour long which be a ripoff most industry standard rpg range between hour long while sword and shield be only a fraction the graphic be bad there be no other way of put be low poly and the texture be hilariously low resolution the color be wash out and this look like someone first attempt in unity try to make a game have to compare this game to other game in the industry gamefreak need feat hold to the fire and should no longer be allow to coast off the pokémon name i do not recommend purchase these game be cash grab make by developer who can not even program a game right and that be just pathetic how a company can not even do job right because be that incompetent,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
this be what happen if a company realize that can do anything and people keep buy product pokemon sword and shield be the bad mainline pokemon game to date gamfreaks cut more than of pokemon remove move mega evolution and endgame content without add any improvement promise most of the pokemon model be prove identical to ds pokemon model which mean lie becae in the interview say the reason can not bring all the pokemon be becae have to make all the model from scratch which be prove be not the case,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
’ an amazing feat how soulless this game feel compare to previous entry in the pokemon franchise this game feel like a downgrade to any of the recent pokemon game feel like there ’ less content overall with not much to add back to give a grand experience with and there ’ much more to the story than just dexit bad graphic and animation large content cut the game hold hand and be more tedious with constant cutscene and bland story and limited location along with very little post game if want to get this game good for but in opinion just feel like one step forward two step back even the good addition be not enough to save,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
i really wish the game be good i do not leave this review out of spite but becae i love the series for the positive i like some of the new design on top of that most pokemon get additional move give more variety in combat besides that be a pokemon game with serio add flaw though be a trend set in early game the new game be more linear and less secret fill than ever dungeon be more akin to hallway instead labyrinth with branching path and abundant secret there be very few route in the game and the one that exist be mostly short and lack complexity the wild area feel half baked and unfinished after the short main campaign all that be leave for be a short side story and a battle tower besides that be entirely reliant on play online for content as a gimmick dynamax against other player be frtrating and poorly balanced one wrong move and be down two or even three mon this be especially devastating in v match feel even more poorly balance than the previo battle gimmick mega evolution and z move be painful to see the series hit this low i want this to be good but wade through the major flaw to get to the small nugget of good simply be not worth the game be not irredeemable but be really hard to recommend when drop the ball so hard,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
the game be full of graphical glitch be short and lacking in thing to do not only after complete the meek rh for the champion title but afterwards as well the game design be sloppy and quite obvioly rh this be not a finished product and should not be bill as such there be a few positive thing about these game but the bad far far outweigh the good a thorough disappointment for long time fan of the series,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
pokemon sword and shield boast expansive landscape and a sense of ambition at first the more the game go on become aware of how hastily put together and hollow really be the main selling point the wild area be small than would expect and plain ugly to boot there be not a lot to do there the same game for the game s traditional route which be some of the small and most linear yet this combine with the game interrupt every second for leon to brag about charizard or whatever make the game a chore to sit through despite short length that be not to say the game do not have s positive though galar can be quite pretty when come to town and color and there be some much welcome quality of life improvement like easily accessible move re learner and nature mint a lot of the series past tedium be now go and get into competitive have never be easy the new monster design be also cool and imaginative the dynamax gym battle be a sight to behold with giant battle and roar crowd i also much prefer the new y comm to generation s festival plaza unfortunately that be about all nice i have to say about the game the difficulty be nonexistent the story be dull and predictable and the overall content and variety pale compare to past entry despite sacrifice pokemon and cost more these negative as well as the technical issue and sign of a rh game an important battle have no mic last gym be a single room etc mean i can only give these a there be potential but need to spend more time on product for something truly good well luck next game,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## LSA 

In [104]:
lsa = TruncatedSVD(2)
doc_topic = lsa.fit_transform(doc_word)
lsa.explained_variance_ratio_

array([0.0785955 , 0.01366075])

In [105]:
topic_word = pd.DataFrame(lsa.components_.round(3),
             index = ["component_1","component_2"],
             columns = vectorizer.get_feature_names())
topic_word

Unnamed: 0,aa,aa good,aaa,aaa console,aaa crash,aaa deserve,aaa developer,aaa element,aaa fall,aaa ile,...,zoo,zoo money,zoom,zoom max,zooopia,zooopia characer,zubat,zubat battle,zubat play,zubat tentacool
component_1,0.0,0.0,0.003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
component_2,-0.0,-0.0,-0.004,0.0,0.0,-0.0,-0.0,-0.001,0.0,-0.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0


In [106]:
def display_topics(model, feature_names, no_top_words, topic_names=None):
    for ix, topic in enumerate(model.components_):
        if not topic_names or not topic_names[ix]:
            print("\nTopic ", ix)
        else:
            print("\nTopic: '",topic_names[ix],"'")
        print(", ".join([feature_names[i]
                        for i in topic.argsort()[:-no_top_words - 1:-1]]))

In [107]:
display_topics(lsa, vectorizer.get_feature_names(), 15)


Topic  0
like, new, battle, area, feel, good, time, wild, look, animation, play, bad, story, wild area, jt

Topic  1
area, battle, feel, wild, like, wild area, gym, story, feel like, route, raid, character, fight, trainer, team


### NMF 

In [None]:
nmf_model = NMF(2)
doc_topic = nmf_model.fit_transform(doc_word)

In [None]:
topic_word = pd.DataFrame(nmf_model.components_.round(3),
             index = ["component_1","component_2"],
             columns = vectorizer.get_feature_names())
topic_word

In [None]:
display_topics(nmf_model, vectorizer.get_feature_names(), 15)

### LDA 

In [None]:
corpus = matutils.Sparse2Corpus(doc_word)

In [None]:
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [None]:
id2word = dict((v, k) for k, v in vectorizer.vocabulary_.items())

In [None]:
lda = models.LdaModel(corpus=corpus, num_topics=3, id2word=id2word, passes=10)

In [None]:
lda.print_topics()