# Suicide Watch analysis
This notebook will walk you through building the models we
built after collecting our data from the Suicide Watch Subreddit

We first import the libraries and utility files we are going to be using,
and parse and clean our data.

In [44]:
%matplotlib inline

# Import machine learning libraries
import gensim
import textmining
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.sparse as sparse
from sklearn.manifold import MDS
from sklearn.cluster import KMeans
import re
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer

# Import utility files
import dataUtils
import clusterUtils

In [55]:
# Get the data from the csv
df = dataUtils.read_df('data')

In [56]:
# Clean the text in the datafram
df =df.replace(np.nan, '', regex=True)
df["rawtext"]= df["title"]+" "+df["selftext"]
df["cleantext"]=df["rawtext"].apply(dataUtils.remove_links).apply(dataUtils.cleanSentence)

In [None]:
# Get a stream of text
posts= df["cleantext"].apply(lambda str: str.split()).tolist()

In [None]:
# Train a phraseDetector
two_word_phrases = gensim.models.Phrases(posts)

In [None]:
two_word_phraser = gensim.models.phrases.Phraser(two_word_phrases)

In [None]:
# phrase_length =3
#posts = list(two_word_phraser[posts])
three_word_phrases = gensim.models.Phrases(two_word_phraser[posts])
three_word_phraser = gensim.models.phrases.Phraser(three_word_phrases)
posts              = list(three_word_phraser[two_word_phraser[posts]])

In [None]:
# update clean text
df["cleantext"]=df["cleantext"].apply(lambda str: " ".join(three_word_phraser[two_word_phraser[str.split()]]))

#### Data summary statistics

Before building models, we first look at that data that we are using.

In [None]:
# Get the number of posts
num_posts = len(posts)
num_posts

In [None]:
#get the number of users (minus [deleted])
userList= df["author"].tolist()
userDict = {}
for user in userList:
    if user in userDict.keys() and user != "[deleted]":
        userDict[user] =1+userDict[user]
    else:
        userDict[user] =1
len(list(userDict.keys()))

#### Build word2vec model
At this step we will build the word2vec model that we will use in the rest of the analysis.
Becuase this is a compuationally expensive process, we save the results of running our model
as the value of model_name +".model" in the models directory. We can then load this model later, and do not need
to re build it every time we want to analyze it.

In [2]:
model_name = "model4"

In [None]:
dataUtils.save_object(posts,'objects/',model_name+"-posts")

In [4]:
posts = dataUtils.load_object('objects/',model_name+"-posts")

In [None]:
# Build the model
model = gensim.models.Word2Vec(posts,min_count =10,
                               sg=1, size =300,window=5,hs=1,negative=20)
model.save('models/'+model_name+'.model')
del model

In [5]:
# load the model
model = gensim.models.Word2Vec.load('models/'+model_name+'.model')
# Test the model: you should see cat somewhere in this list, near the top
model.most_similar(positive=["kitten"])

[('cat', 0.48737263679504395),
 ('dog', 0.4538121819496155),
 ('german_shepherd', 0.44304412603378296),
 ('baby', 0.41745078563690186),
 ('pet', 0.41535550355911255),
 ('cats', 0.41435688734054565),
 ('adopted', 0.40623027086257935),
 ('puppy', 0.4011327028274536),
 ('pup', 0.3940452039241791),
 ('kittens', 0.38637271523475647)]

#### Test Model

At this step we run some basic tests to ensure that the model has picked up on some of the semantic meanings of words.

In [None]:
model.most_similar(positive=["kitten"])

In [None]:
model.most_similar(positive=["heartbreak"])

In [None]:
model.most_similar(positive=["pills"])

In [None]:
model.most_similar(positive=["knife"])

In [None]:
model.most_similar(positive=["heartbreak"])

In [None]:
model.most_similar(positive=["drugs","hurt"],negative =["help"])

In [None]:
model.most_similar(positive=["drugs","help"],negative =["hurt"])

#### Word usage summary

At this step, after our model has looked at all the words, 
and filtered some out, we will look at the words used by our model.

In [6]:
# Initialize the list of words used
vocab_list = sorted(list(model.wv.vocab))

In [7]:
unique_words = len(vocab_list)
unique_words

['#',
 '##',
 '###',
 '%',
 '%+',
 '%_certain',
 '%_certainty',
 '%_chance',
 '%_effective',
 '%_success_rate',
 '%_sure',
 "'",
 "''",
 "''i",
 "''you",
 "'_'",
 "'_''",
 "'_lb",
 "'_lbs",
 "'_pounds",
 "'_tall",
 "'a",
 "'all",
 "'bad'",
 "'be",
 "'being",
 "'best_friend'",
 "'better'",
 "'cause",
 "'close'",
 "'cool'",
 "'d",
 "'depressed'",
 "'depression'",
 "'do",
 "'don't",
 "'em",
 "'everything",
 "'family'",
 "'fix'",
 "'friend'",
 "'friends'",
 "'fuck",
 "'get",
 "'get_better'",
 "'get_over_it'",
 "'go",
 "'god'",
 "'good",
 "'good'",
 "'happy'",
 "'have",
 "'help'",
 "'hey",
 "'home'",
 "'how",
 "'i",
 "'i'm",
 "'if",
 "'in",
 "'it",
 "'it'",
 "'it's",
 "'it_gets_better'",
 "'it_will",
 "'its",
 "'just",
 "'life",
 "'life'",
 "'living'",
 "'love",
 "'love'",
 "'m",
 "'make",
 "'maybe",
 "'me'",
 "'my",
 "'no",
 "'normal'",
 "'not",
 "'oh",
 "'okay'",
 "'one_more_day'",
 "'out",
 "'real",
 "'real'",
 "'right'",
 "'s",
 "'see",
 "'smart'",
 "'suicidal'",
 "'suicide",
 "'that",


In [None]:
total_freq = 0
for word in vocab_list:
    total_freq += model.wv.vocab[word].count
total_freq

In [8]:
temp_list =list(map(lambda s:re.sub("_","_",s),vocab_list))
countvec = CountVectorizer(vocabulary =temp_list,analyzer=(lambda lst:list(map((lambda s:re.sub("_","_",s)),lst))),min_df=0)

In [None]:
tfidf    = TfidfTransformer()

In [9]:
PostsByWords = countvec.fit_transform(posts)

In [10]:
# Inspect a bug with creating PostsByWords
temp = PostsByWords.sum(axis=0).tolist()[0]
ctr =0
for i in range(len(temp)):
    if temp[i] < model.wv.vocab[vocab_list[i]].count:
        print("<:  "+vocab_list[i],temp[i]-model.wv.vocab[vocab_list[i]].count,temp[i],model.wv.vocab[vocab_list[i]].count)
    elif temp[i] > model.wv.vocab[vocab_list[i]].count:
        print(">:  "+vocab_list[i],temp[i]-model.wv.vocab[vocab_list[i]].count,temp[i],model.wv.vocab[vocab_list[i]].count)

In [11]:
# Calculate the magnitude of the error
sum(temp)-sum(list(map(lambda i: model.wv.vocab[vocab_list[i]].count, range(len(vocab_list)))))

0

In [None]:
# compare PostsByWords values to correct values
PostsByWords.sum(axis=0).tolist()[0]==list(map(lambda i: model.wv.vocab[vocab_list[i]].count, range(len(vocab_list))))

In [None]:
test_vocab = countvec.vocabulary_

In [None]:
total_arr = posts_arr.sum(axis=0)

In [None]:
sum(total_arr)-sum(list(map(lambda i: model.wv.vocab[vocab_list[i]].count, range(len(vocab_list)))))

In [None]:
ctr = 0
for i in range(len(posts)):
    post = posts[i]
    for j in range(len(post)):
        word = post[j]
        if word == "amusement_park":
            ctr = ctr+1
print(ctr)

#### Run Clustering
At this step we run and analyze the KMeans clustering algorithm 
implemented by sklearn on the word vectors we got from word2vec.

The first step for this proccess is to extract the word vectors,
and the words they correspond with from the model. We then tests 
different values of K to observe the effect of the number of centers on the fit of the model.
After this we select a value of K to use to get the clusterings. 
We then save this result in the directory "clustures" with the name model_name + num_centers+".pkl", to save future computational time

We then use the kmeans model to generate a list of dictionaries, where each dictionary corresponds to a cluster, and contains following fields:
    'unique_words': The number of different unique words in the cluster
    'total_freq'  : The total number of times one of the words in the cluster appeared in the corpus
    'word_list'   : A list of words in the cluster, paired with how often they appeared in the cluster

Finally we print a representation of this list to a csv, so that the clusters can be manuelly inspected.
This representation includes the number of unique words in the cluster, the total frequency of words in the cluster, and the size_words_list most frequent words in the cluster

In [14]:
# Extract the word vectors
vecs = []
for word in vocab_list:
    vecs.append(model.wv[word].tolist())

In [15]:
# change array format into numpy array
WordByFeatureMat = np.array(vecs)

In [None]:
# get the fit for different values of K
test_points = [12]+ list(range(25,401,25))
fit = []
for point in test_points:
    tempMeans = KMeans(n_clusters=point, random_state=42).fit(WordByFeatureMat)
    fit.append(tempMeans.inertia_)

In [None]:
# Save the fit values for this model
dataUtils.save_object(fit,'objects/',model_name+"-fit")
dataUtils.save_object(test_points,'objects/',model_name+"-testpoints")
del fit
del test_points

In [None]:
# Load the fit and test point values
fit         = dataUtils.load_object('objects/',model_name+"-fit")
test_points = dataUtils.load_object('objects/',model_name+"-testpoints")

In [None]:
fit1         = dataUtils.load_object('objects/',"model1-fit")
test_points1 = dataUtils.load_object('objects/',"model1-testpoints")
fit2         = dataUtils.load_object('objects/',"model2-fit")
test_points2 = dataUtils.load_object('objects/',"model2-testpoints")
fit3         = dataUtils.load_object('objects/',"model3-fit")
test_points3 = dataUtils.load_object('objects/',"model3-testpoints")

In [None]:
# graph the fit for different values of K
plt.plot(test_points1,fit1,'ro')
plt.plot(test_points2,fit2,'bo')
plt.plot(test_points3,fit3,'yo')
plt.show()

In [12]:
# set the number of clusters
num_clusters = 100

In [24]:
#initialize kmeans model
kmeans = KMeans(n_clusters=num_clusters, random_state=42).fit(WordByFeatureMat)
# Save the clusters directory
dataUtils.save_object(kmeans,'clusters/',model_name+"-"+str(num_clusters))
del kmeans

In [30]:
# load kmeans
kmeans = dataUtils.load_object('clusters/',model_name+"-"+str(num_clusters))

In [31]:
clusters = clusterUtils.makeClusteringObjects(model,kmeans,vocab_list,WordByFeatureMat)

In [None]:
# determine the total words in the clusters, and the total number of unique words in the clusters
clusters_total_words  = 0
clusters_unique_words = 0
for cluster in clusters:
    clusters_total_words  += cluster['total_freq']
    clusters_unique_words += cluster['unique_words']

In [None]:
# Check that the total number of words in clusters matches the total
clusters_total_words   

In [None]:
# Check that the number of unique words in clusters matches the total number of unique words
clusters_unique_words

##### Print clusters

Print clusters so we can analyze them

In [32]:
# Sort all the words in the words list
for cluster in clusters:
    cluster["word_list"].sort(key=lambda x:x[1],reverse = True)

In [33]:
size_words_list =100
table =[]
for i in range(len(clusters)):
    row =[]
    row.append("cluster " + str(i+1))
    row.append(clusters[i]["total_freq"])
    row.append(clusters[i]["unique_words"])
    for j in range(size_words_list):
        try:
            row.append(clusters[i]["word_list"][j])
        except:
            break
    table.append(row)

In [34]:
import csv
with open('clusters-'+model_name+"-"+str(num_clusters)+'.csv', 'w') as csvfile:
    writer = csv.writer(csvfile)
    [writer.writerow(r) for r in table]

#### Display Clusters Using MDS

Produce a visualization of our clusters in a low dimensional space

In [None]:
# Fit the model to the clusters
mds = MDS().fit(kmeans.cluster_centers_)

In [None]:
top_words= list(map(lambda x: x[0][0],map(lambda x: x["word_list"],clusters)))

In [None]:
# Get the embeddings
embedding = mds.embedding_.tolist()
x = list(map(lambda x:x[0],embedding))
y = list(map(lambda x:x[1],embedding))

In [None]:
len(top_words)

In [None]:
# Plot the Graph with top words
plt.figure(figsize=(20,10))
plt.plot(x,y,'bo')
for i in range(len(top_words)):
    plt.annotate(top_words[i],(x[i],y[i]))
plt.show()

In [None]:
def helper(indicies,points):
    temp=[]
    for i in indicies:
        temp.append(points[i-1])
    return temp

bullying = [59,16,47]
crime    = [31,73]
depressive_feelings = [1,3,15,21,29,45,81,4,30]
depressive_symptoms = [9,13,28] 
drug_abuse =[22,41,75]
illness  = [35,87]
failure = [68,89,90,14,19,26,52]
prior_suicide = [27,56,79]
psychological =[78,10,44,66,85]
self_harm  = [5,17]
self_image = [69,8,96]
death_around = [76,93]
suicidal_ideation =[36,38,57,58,97,6]
identified =bullying+crime+depressive_feelings+depressive_symptoms
identified = identified +drug_abuse+illness+failure+prior_suicide+psychological
identified = identified +self_harm+self_image+death_around+suicidal_ideation
other = [x for x in range(1,101) if x not in identified]
all_categories = [bullying,crime,depressive_feelings,depressive_symptoms,
                  drug_abuse,illness,failure,prior_suicide,psychological,
                  self_harm, self_image,death_around,suicidal_ideation,other]
colors = ["black" for x in all_categories]

"""
colors = ["#ff66ff","#6666ff","#000099",
          "#33cccc","#00cc66","#336600",
          "#ccff33","#cc6600","#ff0000",
          "#cc0066","#ffccff","#ccffff","#00ff00","#00ffff"]
"""
#colors[0]="grey"  # Bullying
colors[2]="red"   # Depressive Feelings
#colors[4]="green" # Drug Abuse
#colors[6]="blue"  # Poor performance
colors[3]="magenta" # Depressive symptoms
colors[8]="cyan" # Psychological 


# Plot the Graph with top words
plt.figure(figsize=(10,5))
for i in range(len(all_categories)):
    category = all_categories[i]
    color = colors[i]
    plt.scatter(helper(category,x),helper(category,y),color=color,s=100)
plt.show()

#### Prepare for regression :TODO

At this step, we will initialize the matricies we need to run a linear regression algorithm.
We will need to create a document term matrix, and a words by cluster matrix.
We will first use sklearn's CountVectorizer function to create the document term matrix. 
We will create the words by cluster matrix by giving each word a one hot vector, with a
one in the cluster number, and a 0 everywhere else.

In [None]:
wordDict ={}
for sentence in posts:
    for word in sentence:
        if word in wordDict.keys() and word != "[deleted]":
            wordDict[word] =1+wordDict[word]
        else:
            wordDict[word] =1

In [None]:
df["cleantext"]=df["cleantext"].apply(lambda str : ' '.join(list(filter(lambda s: wordDict[s]>=10 ,str.split()))))

In [35]:
countvec = CountVectorizer(vocabulary =vocab_list,analyzer=(lambda lst:list(map((lambda s:re.sub("_","_",s)),lst))),min_df=0)

In [36]:
# Make Posts By Words Matrix
PostsByWords = countvec.fit_transform(posts)

In [39]:
clusterWords = list(map(lambda x: list(map( lambda y: y[0] ,x["word_list"])), clusters))

In [41]:
# Make Clusters By Words Matrix
ClustersByWords = countvec.fit_transform(clusterWords)

In [42]:
ClustersByWords

<100x29272 sparse matrix of type '<class 'numpy.int64'>'
	with 29272 stored elements in Compressed Sparse Row format>

In [43]:
ctr = 0
for cluster in clusters:
    ctr += cluster["unique_words"]
ctr

29272

In [50]:
ctr = 0
for cluster in clusters:
    ctr += cluster["total_freq"]
ctr

27362117

In [45]:
WordsByCluster = ClustersByWords.transpose(copy=True)

In [47]:
PostsByWords

<131652x29272 sparse matrix of type '<class 'numpy.int64'>'
	with 13666923 stored elements in Compressed Sparse Row format>

In [46]:
WordsByCluster

<29272x100 sparse matrix of type '<class 'numpy.int64'>'
	with 29272 stored elements in Compressed Sparse Column format>

In [48]:
PostsByCluster = PostsByWords.dot(WordsByCluster)

In [49]:
PostsByCluster

<131652x100 sparse matrix of type '<class 'numpy.int64'>'
	with 4017693 stored elements in Compressed Sparse Row format>

In [54]:
sum(PostsByCluster.sum(axis=0).tolist()[0])==sum(PostsByWords.sum(axis=0).tolist()[0])

True

In [65]:
ups = list(df.ups)

In [67]:
downs = list(df.downs)

In [68]:
score = list(df.score)

In [84]:
num_comments=list(df.num_comments)

In [63]:
# initialize model
from sklearn import linear_model
ups_model   = linear_model.Lasso(alpha=0.1)
downs_model = linear_model.Lasso(alpha=0.1)
score_model = linear_model.Lasso(alpha=0.1)

In [83]:
num_comments_model = linear_model.Lasso(alpha=0.1)

In [66]:
#train ups_model
ups_model.fit(PostsByCluster,ups)
dataUtils.save_object(ups_model,"objects",model_name+"-ups_regression")

In [69]:
#train downs_model
downs_model.fit(PostsByCluster,downs)
dataUtils.save_object(downs_model,"objects",model_name+"-downs_regression")

In [70]:
#train score_model
score_model.fit(PostsByCluster,score)
dataUtils.save_object(score_model,"objects",model_name+"-score_regression")

In [85]:
num_comments_model.fit(PostsByCluster,num_comments)
dataUtils.save_object(score_model,"objects",model_name+"-num_comments_regression")

In [71]:
ups_model.coef_

array([-0.        ,  0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.11603661,  0.        ,
       -0.0207647 , -0.        ,  0.        , -0.        ,  0.02350768,
       -0.        ,  0.        ,  0.07909588,  0.05333024, -0.        ,
       -0.01213867, -0.        , -0.0168772 ,  0.        ,  0.14171743,
        0.        ,  0.02030992,  0.        , -0.        ,  0.08143078,
        0.        ,  0.        ,  0.        ,  0.31681984,  0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
       -0.        , -0.01968077,  0.66322467, -0.01617729,  0.        ,
       -0.        , -0.        , -0.00381721, -0.01874248, -0.        ,
       -0.        , -0.        ,  0.        , -0.        , -0.        ,
       -0.        ,  0.01993662,  0.        , -0.03059792, -0.        ,
        0.00103457,  0.        ,  0.        ,  0.        ,  0.  

In [72]:
downs_model.coef_

array([ 0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        0.        , -0.        ,  0.01197358, -0.        , -0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.        ,
       -0.        ,  0.        ,  0.        ,  0.00873856,  0.        ,
       -0.        ,  0.        ,  0.        , -0.        , -0.        ,
        0.        ,  0.        , -0.        , -0.        ,  0.        ,
       -0.        , -0.        ,  0.01326633, -0.        ,  0.        ,
       -0.        , -0.        , -0.        ,  0.        , -0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.        ,
       -0.        ,  0.        ,  0.        , -0.        , -0.        ,
        0.        , -0.        , -0.        , -0.        ,  0.  

In [73]:
score_model.coef_

array([-0.        ,  0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        ,  0.10988917,  0.        ,
       -0.02037996, -0.        ,  0.        , -0.        ,  0.02792335,
       -0.        ,  0.        ,  0.06574797,  0.0567461 , -0.        ,
       -0.013023  , -0.        , -0.02024199,  0.        ,  0.1345511 ,
        0.        ,  0.02095982,  0.        ,  0.        ,  0.07754381,
        0.        ,  0.        ,  0.        ,  0.28571445,  0.        ,
        0.        ,  0.        , -0.        ,  0.        ,  0.        ,
       -0.        , -0.        ,  0.        ,  0.        ,  0.        ,
       -0.        , -0.01509528,  0.58123988, -0.01667027,  0.        ,
       -0.        , -0.        , -0.        , -0.01789511, -0.        ,
       -0.        , -0.        ,  0.        , -0.        , -0.        ,
       -0.        ,  0.01962351,  0.        , -0.0253479 , -0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.  

In [None]:
ups_model.coef_

In [86]:
ups_coef = ups_model.coef_.tolist()
downs_coef = downs_model.coef_.tolist()
score_coef = downs_model.coef_.tolist()
num_comments_coef = num_comments_model.coef_.tolist()
for i in range(len(clusters)):
    clusters[i]["ups"]  = ups_coef[i]
    clusters[i]["downs"]= downs_coef[i]
    clusters[i]["score"]= score_coef[i]
    clusters[i]["num_comments"]= num_comments_coef[i]

In [95]:
num_comments_coef

[-0.010220117829876252,
 0.0,
 -0.0,
 0.0,
 -0.0,
 0.0,
 -0.0,
 0.0,
 0.02895358239742311,
 0.0,
 -0.022961673824747102,
 -0.0,
 0.0,
 0.0,
 0.0,
 -0.0,
 -0.0,
 0.0,
 0.1541935612393658,
 0.0,
 -0.038479687381242036,
 -0.0,
 -0.03641715115302902,
 0.15034271510666028,
 0.05511787489154548,
 0.0,
 0.005070442570604117,
 0.0,
 -0.0,
 0.00878719277008099,
 0.0,
 -0.0,
 0.0,
 0.20036534224642463,
 0.0,
 0.0,
 0.0,
 0.0073331772765867905,
 0.014863424061206318,
 0.0,
 -0.0,
 -0.0,
 -0.0,
 0.0,
 0.0,
 -0.0,
 -0.07566129011313927,
 0.21616622629750115,
 -0.003632643936580472,
 0.0,
 -0.028209115984079432,
 -0.0,
 -0.0,
 -0.03459254263951843,
 -0.0,
 -0.0,
 -0.0,
 -0.0,
 -0.0,
 0.0,
 -0.0,
 -0.003442686835202106,
 -0.0,
 -0.0,
 -0.0,
 0.07365192202229257,
 0.0,
 -0.0,
 0.0,
 0.0,
 0.0,
 0.062020632094620436,
 -0.000604338723609844,
 -0.0,
 -0.0,
 -0.0,
 0.0,
 0.0,
 -0.0,
 0.014678655154487515,
 -0.03804150873533185,
 0.0,
 0.0,
 -0.0,
 0.0,
 -0.006696701506456538,
 0.0423824078352613,
 -0.0,
 

In [92]:
clusters[5]

{'downs': 0.0,
 'num_comments': 0.0,
 'score': 0.0,
 'total_freq': 35760,
 'unique_words': 161,
 'ups': 0.0,
 'word_list': [('cut', 7314),
  ('cutting', 3061),
  ('knife', 2058),
  ('blood', 1382),
  ('wrists', 1376),
  ('arms', 1286),
  ('skin', 1219),
  ('arm', 1183),
  ('throat', 1090),
  ('scars', 946),
  ('wrist', 839),
  ('legs', 770),
  ('slit', 697),
  ('cuts', 692),
  ('leg', 575),
  ('bleed', 544),
  ('bleeding', 527),
  ('fingers', 388),
  ('razor', 359),
  ('knives', 338),
  ('blade', 334),
  ('slitting', 327),
  ('scratch', 316),
  ('wounds', 248),
  ('bruises', 244),
  ('bathtub', 242),
  ('scar', 233),
  ('stab', 224),
  ('veins', 223),
  ('sharp', 222),
  ('metal', 188),
  ('stab_myself', 181),
  ('slice', 173),
  ('grabbing', 170),
  ('thighs', 164),
  ('wound', 157),
  ('stabbing', 148),
  ('razors', 145),
  ('bled', 139),
  ('tub', 138),
  ('vein', 137),
  ('scratching', 132),
  ('nails', 129),
  ('pen', 122),
  ('blades', 121),
  ('razor_blade', 118),
  ('scissors',