In [1]:
import numpy as np
import pandas as pd
import warnings
import os
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS as my_stop_words
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.test.utils import common_texts
from gensim.models import Word2Vec
import re
import copy

# Load Data

In [3]:
data = pd.read_csv('Reviews.csv')

# Data Preprocessing

In [4]:
data = data.head(10000)
data = data[['Text','Score']]
data['Score'] = data['Score'].apply(lambda x:1 if x>=4 else 0)
data['Text'] = data['Text'].apply(lambda x: x.replace('.', ''))

#data['Text_without_stopwords'] = data['Text'].apply(lambda x: ' '.join([word for word in x.split(' ') if word not in (my_stop_words)]))

#data['Text_without_stopwords'] = data['Text_without_stopwords'].apply(lambda x: x.split(' '))

In [5]:
data.head(10)

Unnamed: 0,Text,Score
0,I have bought several of the Vitality canned d...,1
1,Product arrived labeled as Jumbo Salted Peanut...,0
2,This is a confection that has been around a fe...,1
3,If you are looking for the secret ingredient i...,0
4,Great taffy at a great price There was a wide...,1
5,I got a wild hair for taffy and ordered this f...,1
6,This saltwater taffy had great flavors and was...,1
7,This taffy is so good It is very soft and che...,1
8,Right now I'm mostly just sprouting this so my...,1
9,This is a very healthy dog food Good for their...,1


In [6]:
all_words = list(data.Text)

## TF-IDF

In [7]:
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/tsaiyichen/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/tsaiyichen/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [8]:
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

# function that receive a list of words and do lemmatization:
def lemma_stem_text(words_list):
    # Lemmatizer
    text = [lemmatizer.lemmatize(token.lower()) for token in words_list]# eighties->eight or messages->message or drugs->drug
    text = [lemmatizer.lemmatize(token.lower(), "v") for token in text]# going-> go or started->start or watching->watch
    return text

In [9]:
import re
re_negation = re.compile("n't ")

# function that receive a sequence of woords and return the same sequence transforming
# abbreviated negations to the standard form.
def negation_abbreviated_to_standard(sent):
    sent = re_negation.sub(" not ", sent)
    return sent

In [10]:
def review_to_words(raw_review):
    # 1. Remove HTML tags
    review_text = raw_review
    
    # 2. Transform abbreviated negations to the standard form.
    review_text = negation_abbreviated_to_standard(review_text)
    
    # 3. Remove non-letters and non-numbers   
    letters_numbers_only = re.sub("[^a-zA-Z_0-9]", " ", review_text) 
    
    # 4. Convert to lower case and split into individual words (tokenization)
    words = np.char.lower(letters_numbers_only.split())                             
    
    # 5. Remove stop words
    meaningful_words = [w for w in words if not w in my_stop_words]   
    
    # 6. Apply lemmatization function
    lemma_words = lemma_stem_text(meaningful_words)
    
    # 7. Join the words back into one string separated by space, and return the result.
    return( " ".join(lemma_words))

In [11]:
# We initialize an empty list to add the clean reviews
cleaned_text= []
data['Clean_Text'] = ''
# We loop over each review and clean it  
for n,i in enumerate(all_words):
    tmp = review_to_words(i)
    data['Clean_Text'][n] = tmp
    cleaned_text.append(tmp)

In [12]:
vectorizer = TfidfVectorizer(max_features=1000, ngram_range = (1,2))
X = vectorizer.fit_transform(cleaned_text)
#print(vectorizer.get_feature_names_out())
#print(X.shape)

In [39]:
array = pd.DataFrame(X.toarray(),columns=vectorizer.get_feature_names())

feature_array = np.array(vectorizer.get_feature_names())
tfidf_sorting = np.argsort(X.toarray()).flatten()[::-1]

n = 10
top_n = feature_array[tfidf_sorting][:n]

print(top_n)

{'buy': 120, 'can': 128, 'dog': 270, 'food': 347, 'product': 703, 'good': 382, 'quality': 712, 'look': 536, 'like': 516, 'process': 701, 'meat': 563, 'smell': 812, 'better': 78, 'appreciate': 42, 'dog food': 271, 'good quality': 385, 'look like': 538, 'arrive': 45, 'label': 497, 'salt': 762, 'peanut': 648, 'actually': 17, 'small': 810, 'size': 805, 'sure': 871, 'light': 515, 'nut': 607, 'case': 138, 'cut': 234, 'tiny': 908, 'square': 835, 'coat': 173, 'powder': 689, 'sugar': 863, 'chewy': 157, 'flavorful': 344, 'highly': 432, 'recommend': 733, 'yummy': 999, 'treat': 917, 'sell': 780, 'highly recommend': 433, 'ingredient': 468, 'believe': 73, 'get': 369, 'addition': 21, 'beer': 71, 'extract': 309, 'order': 629, 'cherry': 155, 'soda': 816, 'flavor': 339, 'great': 396, 'price': 697, 'delivery': 253, 'quick': 714, 'lover': 546, 'deal': 244, 'great price': 398, 'pound': 687, 'bag': 57, 'peppermint': 652, 'grape': 392, 'complaint': 192, 'bite': 83, 'red': 736, 'black': 86, 'licorice': 512, '

## word2vec

In [15]:
import gensim
from gensim.models import word2vec

In [16]:
cleaned_text = data['Clean_Text'].apply(lambda x: x.split()) 
cleaned_text

0       [buy, vitality, can, dog, food, product, good,...
1       [product, arrive, label, jumbo, salt, peanutst...
2       [confection, century, light, pillowy, citrus, ...
3       [look, secret, ingredient, robitussin, believe...
4       [great, taffy, great, price, wide, assortment,...
                              ...                        
9995    [switch, advance, similac, organic, product, t...
9996    [like, bad, review, say, organic, formula, con...
9997    [want, solely, breastfeed, unable, supplement,...
9998    [love, fact, delieved, house, delievy, chargei...
9999    [7, week, old, gas, constipation, problem, 5, ...
Name: Clean_Text, Length: 10000, dtype: object

In [17]:

model_w2v = word2vec.Word2Vec(
            cleaned_text,
            vector_size=200, # desired no. of features/independent variables
            window=5, # context window size
            min_count=2, # Ignores all words with total frequency lower than 2.                                  
            sg = 1, # 1 for skip-gram model
            hs = 0,
            negative = 10, # for negative sampling
            workers= 32, # no.of cores
            seed = 34
) 

model_w2v.train(cleaned_text, total_examples= len(all_words),epochs=20)


(6233425, 7348840)

In [18]:
print(model_w2v.wv.most_similar("gas"))
print(len(model_w2v.wv['food']))

[('ethylene', 0.5495854616165161), ('stool', 0.4995848834514618), ('mucous', 0.49382951855659485), ('station', 0.48272189497947693), ('switchover', 0.481843501329422), ('fussiness', 0.48112308979034424), ('whimper', 0.47058507800102234), ('rumble', 0.46831023693084717), ('tract', 0.46765217185020447), ('taint', 0.45563456416130066)]
200


In [19]:
def word_vector(tokens, size):
    vec = np.zeros(size).reshape((1, size))
    count = 0
    for word in tokens:
        try:
            vec += model_w2v.wv[word].reshape((1, size))
            count += 1.
        except KeyError:  # handling the case where the token is not in vocabulary
            continue
    if count != 0:
        vec /= count
    return vec

In [20]:
wordvec_arrays = np.zeros((len(cleaned_text), 200)) 
for i in range(len(cleaned_text)):
    wordvec_arrays[i,:] = word_vector(cleaned_text[i], 200)
wordvec_df = pd.DataFrame(wordvec_arrays)
wordvec_df.shape

(10000, 200)

In [21]:
wordvec_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,190,191,192,193,194,195,196,197,198,199
0,0.073363,-0.097225,-0.013715,0.105041,-0.171356,0.146342,-0.114115,0.053921,-0.083498,-0.056085,...,0.090543,0.169873,0.010630,-0.104951,0.143127,0.051993,-0.063602,-0.207688,0.150058,0.058920
1,0.062928,-0.160166,0.105520,0.130894,-0.436897,0.096302,-0.022009,0.172532,0.058222,-0.030489,...,0.061823,-0.009096,-0.000964,-0.067400,0.156984,0.164527,-0.142258,-0.214795,0.312448,-0.210894
2,-0.011035,-0.359206,0.032021,0.124204,-0.338012,0.071365,-0.136356,0.051192,0.056352,-0.056549,...,-0.024111,-0.022983,0.028703,0.042366,0.099668,-0.046657,-0.104071,-0.015003,0.248600,-0.076503
3,0.178515,-0.243833,0.084696,0.047925,-0.106130,0.042418,-0.003998,0.040451,-0.005525,0.099882,...,-0.114303,0.052744,-0.120447,-0.106265,-0.014518,-0.071731,-0.088273,-0.214934,0.226067,-0.037839
4,0.230269,-0.142799,-0.043723,0.075066,-0.152810,-0.009061,-0.033083,-0.049079,0.141197,-0.170891,...,-0.178677,-0.132627,-0.286317,0.063227,0.261554,0.219833,-0.097864,-0.150255,0.490547,-0.027097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-0.006606,-0.257978,0.187350,0.197935,-0.239481,0.114473,-0.022413,0.037548,-0.071378,-0.015717,...,-0.170603,0.086873,0.079009,-0.113725,0.103821,-0.019068,0.060683,-0.012763,0.213113,0.142734
9996,0.081403,0.015572,0.085149,0.185344,-0.229031,0.080924,-0.056112,0.084491,-0.065106,-0.120662,...,-0.056234,0.105458,0.076966,-0.120019,0.193228,-0.073202,-0.118479,-0.013325,0.170655,0.204110
9997,0.040201,-0.188957,0.237293,0.259474,-0.238112,0.055364,-0.040671,0.106899,-0.088936,-0.080189,...,-0.128290,0.116688,-0.003412,-0.148293,0.151848,-0.068971,-0.020197,-0.071703,0.237974,0.142820
9998,-0.049785,0.070014,0.063630,0.091186,-0.161012,-0.003208,-0.022670,0.093699,0.077853,-0.447828,...,-0.100885,0.193709,0.014115,-0.078012,0.201470,-0.033098,-0.300205,-0.021569,0.261557,0.230153


# Build Model

In [22]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn import tree

## k-fold cross-validation

b. 請寫自行撰寫function進行k-fold cross-validation(不可使用套件)並計算Accuracy

            b-1. input(k, data)，將data切成k份，其中1份當測試集，剩餘k-1份當訓練集建立模型

            b-2. 輪流將k份的每份資料都當 測試集，其餘當訓練集建立模型，因此會進行k次，k次都計算出Accuracy

            b-3. 將k次的Accuracy平均即為output

        參考範例

            def K_fold_CV(k, data):

                 #設定subset size 即data長度/k

                 #設定Accuracy初始值

                 for i in range(k):

                    #設定testing set與training set的資料起始點與結束點

                    #例如資料有100筆，testing set在本次iteration取第1到25筆，則training set為第26到100筆；下次testing set為26~50，training set為1~25 & 51~100

                    #利用training set建立模型，testing set計算出Accuracy累加

                  return Accuracy/k

In [23]:
def K_fold_CV(k, data_k):
    subset_size = dif = round(len(data_k) / k)
    m,n = np.shape(data_k)
    total_accuracy = 0
    rec = 0
    for i in range(k):
        print(rec)
        training_X = data_k.iloc[rec:rec+dif,0:n-1]
        training_Y = data_k.iloc[rec:rec+dif,-1:]
        rec+=dif
        print(rec)
        if rec == len(data_k):
            rec = 0
        test_X = data_k.iloc[rec:rec+dif,0:n-1]
        test_Y = data_k.iloc[rec:rec+dif,-1:]
        
        clf = RandomForestClassifier(n_estimators = 100)
        clf = clf.fit(training_X,training_Y.astype('int'))
        predict = clf.predict(test_X)
        score = accuracy_score(test_Y.astype('int'), predict)
        print(test_Y)
        print(predict)
        print(score)
        total_accuracy += score
        print('---')
    return total_accuracy/k

In [24]:
tf_array = pd.concat([array,data.Score],axis=1)

In [25]:
type(tf_array)

pandas.core.frame.DataFrame

In [26]:
tf_score = K_fold_CV(4,tf_array)

0
2500
      Score
2500      0
2501      1
2502      1
2503      1
2504      1
...     ...
4995      0
4996      0
4997      0
4998      1
4999      1

[2500 rows x 1 columns]
[1 1 1 ... 1 1 1]
0.8012
---
2500
5000
      Score
5000      1
5001      1
5002      0
5003      1
5004      0
...     ...
7495      1
7496      1
7497      1
7498      1
7499      0

[2500 rows x 1 columns]
[1 1 1 ... 1 1 1]
0.7916
---
5000
7500
      Score
7500      1
7501      1
7502      1
7503      1
7504      1
...     ...
9995      0
9996      1
9997      1
9998      1
9999      1

[2500 rows x 1 columns]
[1 1 1 ... 0 1 1]
0.8104
---
7500
10000
      Score
0         1
1         0
2         1
3         0
4         1
...     ...
2495      0
2496      1
2497      1
2498      1
2499      1

[2500 rows x 1 columns]
[1 0 1 ... 1 1 1]
0.8176
---


In [27]:
w2v_array = pd.concat([wordvec_df,data.Score],axis=1)

In [28]:
w2v_array

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,191,192,193,194,195,196,197,198,199,Score
0,0.073363,-0.097225,-0.013715,0.105041,-0.171356,0.146342,-0.114115,0.053921,-0.083498,-0.056085,...,0.169873,0.010630,-0.104951,0.143127,0.051993,-0.063602,-0.207688,0.150058,0.058920,1
1,0.062928,-0.160166,0.105520,0.130894,-0.436897,0.096302,-0.022009,0.172532,0.058222,-0.030489,...,-0.009096,-0.000964,-0.067400,0.156984,0.164527,-0.142258,-0.214795,0.312448,-0.210894,0
2,-0.011035,-0.359206,0.032021,0.124204,-0.338012,0.071365,-0.136356,0.051192,0.056352,-0.056549,...,-0.022983,0.028703,0.042366,0.099668,-0.046657,-0.104071,-0.015003,0.248600,-0.076503,1
3,0.178515,-0.243833,0.084696,0.047925,-0.106130,0.042418,-0.003998,0.040451,-0.005525,0.099882,...,0.052744,-0.120447,-0.106265,-0.014518,-0.071731,-0.088273,-0.214934,0.226067,-0.037839,0
4,0.230269,-0.142799,-0.043723,0.075066,-0.152810,-0.009061,-0.033083,-0.049079,0.141197,-0.170891,...,-0.132627,-0.286317,0.063227,0.261554,0.219833,-0.097864,-0.150255,0.490547,-0.027097,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-0.006606,-0.257978,0.187350,0.197935,-0.239481,0.114473,-0.022413,0.037548,-0.071378,-0.015717,...,0.086873,0.079009,-0.113725,0.103821,-0.019068,0.060683,-0.012763,0.213113,0.142734,0
9996,0.081403,0.015572,0.085149,0.185344,-0.229031,0.080924,-0.056112,0.084491,-0.065106,-0.120662,...,0.105458,0.076966,-0.120019,0.193228,-0.073202,-0.118479,-0.013325,0.170655,0.204110,1
9997,0.040201,-0.188957,0.237293,0.259474,-0.238112,0.055364,-0.040671,0.106899,-0.088936,-0.080189,...,0.116688,-0.003412,-0.148293,0.151848,-0.068971,-0.020197,-0.071703,0.237974,0.142820,1
9998,-0.049785,0.070014,0.063630,0.091186,-0.161012,-0.003208,-0.022670,0.093699,0.077853,-0.447828,...,0.193709,0.014115,-0.078012,0.201470,-0.033098,-0.300205,-0.021569,0.261557,0.230153,1


In [29]:
w2v_score = K_fold_CV(4,w2v_array)

0
2500
      Score
2500      0
2501      1
2502      1
2503      1
2504      1
...     ...
4995      0
4996      0
4997      0
4998      1
4999      1

[2500 rows x 1 columns]
[1 1 1 ... 1 1 1]
0.78
---
2500
5000
      Score
5000      1
5001      1
5002      0
5003      1
5004      0
...     ...
7495      1
7496      1
7497      1
7498      1
7499      0

[2500 rows x 1 columns]
[1 1 1 ... 1 1 1]
0.7716
---
5000
7500
      Score
7500      1
7501      1
7502      1
7503      1
7504      1
...     ...
9995      0
9996      1
9997      1
9998      1
9999      1

[2500 rows x 1 columns]
[1 1 1 ... 1 1 1]
0.7776
---
7500
10000
      Score
0         1
1         0
2         1
3         0
4         1
...     ...
2495      0
2496      1
2497      1
2498      1
2499      1

[2500 rows x 1 columns]
[1 1 1 ... 1 1 1]
0.7892
---


In [42]:
print('The score of Tf-idf: ',tf_score)

The score of Tf-idf:  0.8052


In [43]:
print('The score of w2v: ',w2v_score)

The score of w2v:  0.7796000000000001


In [30]:
test = pd.read_csv('test.csv')

In [31]:
test.Text = test['Text'].apply(lambda x: x.replace('.', ''))
all_words = list(test.Text)
# We initialize an empty list to add the clean reviews
cleaned_text= []
test['Clean_Text'] = ''
# We loop over each review and clean it  
for n,i in enumerate(all_words):
    tmp = review_to_words(i)
    test['Clean_Text'][n] = tmp
    cleaned_text.append(tmp)

In [32]:
test

Unnamed: 0,Text,Clean_Text
0,I was looking for an easy and convenient way t...,look easy convenient way add lean protein diet...
1,DO NOT be freaked out by the ostrich! This tas...,freak ostrich taste exactly like slim jim shor...
2,I bought the Ostrim with a little apprehension...,buy ostrim little apprehension have eat ostric...
3,At only 80 calories these are a great bang for...,80 calorie great bang caloric buck br low suga...
4,"These are quite tasty and by far the leanest, ...",quite tasty far leanest highest protein great ...
...,...,...
4995,Great product' matches surgar cup for cup Doe...,great product match surgar cup cup doe leave b...
4996,Being a diabetic and a carbs addict I don't ne...,diabetic carbs addict need sugar starch idea g...
4997,The best sweetner I have ever tried Good for b...,best sweetner try good bake brown confectioner
4998,I have been extremely satisfied with the quali...,extremely satisfy quality ideal brand white br...


In [33]:
len(cleaned_text)

5000

In [34]:
vectorizer_k = TfidfVectorizer(max_features=1000, ngram_range = (1,2))
X_k = vectorizer_k.fit_transform(cleaned_text)
array_k = pd.DataFrame(X_k.toarray(),columns=vectorizer.get_feature_names())
array_k

Unnamed: 0,10,100,100 calorie,12,15,16,20,24,25,30,...,wwwamazoncom,wwwamazoncom gp,year,year ago,year old,yes,yogurt,yuban,yum,yummy
0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.250832,0.0,0.0
1,0.0,0.199508,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
2,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
3,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
4,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
4996,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.169346,0.000000,0.0,0.0,0.000000,0.0,0.0
4997,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
4998,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0


In [35]:
array_k

Unnamed: 0,10,100,100 calorie,12,15,16,20,24,25,30,...,wwwamazoncom,wwwamazoncom gp,year,year ago,year old,yes,yogurt,yuban,yum,yummy
0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.250832,0.0,0.0
1,0.0,0.199508,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
2,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
3,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
4,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
4996,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.169346,0.000000,0.0,0.0,0.000000,0.0,0.0
4997,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0
4998,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0


In [36]:
clf_k = RandomForestClassifier(n_estimators = 100)
m,n = np.shape(tf_array)
clf_k = clf_k.fit(tf_array.iloc[:,0:n-1],tf_array.iloc[:,-1:].astype('int'))
predict = clf_k.predict(array_k)

In [37]:
output = pd.DataFrame({'ID': range(1,5001), 'Score': predict})
output.to_csv('HW2_submmit-2.csv', index=False)