In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Reading the CSV dataset into the DataFrame**

In [2]:
%cd /content/drive/My Drive/
import pandas as pd
df = pd.read_csv('./Masters/assignment.csv')
df.head()

/content/drive/My Drive


Unnamed: 0,Author,Title,Poetry Foundation ID,Content,Tag
0,Wendy Videlock,!,55489,"Dear Writers, I’m compiling the first in what ...",
1,Hailey Leithauser,0,41729,"Philosophic\nin its complex, void emptiness,\n...",novel
2,Jody Gladding,1-800-FEAR,57135,We'd like to talk with you about fear t...,
3,Joseph Brodsky,01-Jan-65,56736,The Wise Men will unlearn your name.\nAbove yo...,poetry
4,Ted Berrigan,3 Pages,51624,For Jack Collom\n10 Things I do Every Day\n\np...,novel


**Title of each column in the dataframe**

In [3]:
df.head(0) 

Unnamed: 0,Author,Title,Poetry Foundation ID,Content,Tag


**Information on the contents pf the dataframe**

In [4]:
df.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 0 to 44
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Author                45 non-null     object
 1   Title                 45 non-null     object
 2   Poetry Foundation ID  45 non-null     int64 
 3   Content               45 non-null     object
 4   Tag                   41 non-null     object
dtypes: int64(1), object(4)
memory usage: 1.9+ KB


**Dropping columns Author, Title and ID**

In [5]:
df_1 = df.drop(df.columns[[0,1,2]],axis= 1) 

In [6]:
df_1.columns

Index(['Content', 'Tag'], dtype='object')

**Final DataFrame**

In [7]:
df_1.head()

Unnamed: 0,Content,Tag
0,"Dear Writers, I’m compiling the first in what ...",
1,"Philosophic\nin its complex, void emptiness,\n...",novel
2,We'd like to talk with you about fear t...,
3,The Wise Men will unlearn your name.\nAbove yo...,poetry
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel


# **Pre-processing the Data**

In [8]:
pip install nltk



## **convert all text to lower-case**

In [9]:
df_1['lowercase'] = df_1['Content'].apply(lambda Content: Content.lower())

In [10]:
df_1.head()

Unnamed: 0,Content,Tag,lowercase
0,"Dear Writers, I’m compiling the first in what ...",,"dear writers, i’m compiling the first in what ..."
1,"Philosophic\nin its complex, void emptiness,\n...",novel,"philosophic\nin its complex, void emptiness,\n..."
2,We'd like to talk with you about fear t...,,we'd like to talk with you about fear t...
3,The Wise Men will unlearn your name.\nAbove yo...,poetry,the wise men will unlearn your name.\nabove yo...
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel,for jack collom\n10 things i do every day\n\np...


## **Remove Numbers**

In [11]:
import re
import nltk
df_1['no_free'] = df_1['lowercase'].apply( lambda no_free: re.sub(r'\d+','', no_free))

In [12]:
df_1.head()

Unnamed: 0,Content,Tag,lowercase,no_free
0,"Dear Writers, I’m compiling the first in what ...",,"dear writers, i’m compiling the first in what ...","dear writers, i’m compiling the first in what ..."
1,"Philosophic\nin its complex, void emptiness,\n...",novel,"philosophic\nin its complex, void emptiness,\n...","philosophic\nin its complex, void emptiness,\n..."
2,We'd like to talk with you about fear t...,,we'd like to talk with you about fear t...,we'd like to talk with you about fear t...
3,The Wise Men will unlearn your name.\nAbove yo...,poetry,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name.\nabove yo...
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel,for jack collom\n10 things i do every day\n\np...,for jack collom\n things i do every day\n\npla...


## **Remove Punctuation**

In [13]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
#re.sub(r'[^\w\s]','',s)
df_1['punc_free'] = df_1['no_free'].apply( lambda punc_free: re.sub(r'[^\w\s]','',punc_free))

In [15]:
df_1.head()

Unnamed: 0,Content,Tag,lowercase,no_free,punc_free
0,"Dear Writers, I’m compiling the first in what ...",,"dear writers, i’m compiling the first in what ...","dear writers, i’m compiling the first in what ...",dear writers im compiling the first in what i ...
1,"Philosophic\nin its complex, void emptiness,\n...",novel,"philosophic\nin its complex, void emptiness,\n...","philosophic\nin its complex, void emptiness,\n...",philosophic\nin its complex void emptiness\na ...
2,We'd like to talk with you about fear t...,,we'd like to talk with you about fear t...,we'd like to talk with you about fear t...,wed like to talk with you about fear th...
3,The Wise Men will unlearn your name.\nAbove yo...,poetry,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name\nabove you...
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel,for jack collom\n10 things i do every day\n\np...,for jack collom\n things i do every day\n\npla...,for jack collom\n things i do every day\n\npla...


## **Remove Whitespaces** 

In [16]:
#' '.join(.split())

df_1['space_free'] = df_1['punc_free'].apply( lambda space: ' '.join(space.split()))
#df_1.head()

In [17]:
df_1.head()

Unnamed: 0,Content,Tag,lowercase,no_free,punc_free,space_free
0,"Dear Writers, I’m compiling the first in what ...",,"dear writers, i’m compiling the first in what ...","dear writers, i’m compiling the first in what ...",dear writers im compiling the first in what i ...,dear writers im compiling the first in what i ...
1,"Philosophic\nin its complex, void emptiness,\n...",novel,"philosophic\nin its complex, void emptiness,\n...","philosophic\nin its complex, void emptiness,\n...",philosophic\nin its complex void emptiness\na ...,philosophic in its complex void emptiness a sk...
2,We'd like to talk with you about fear t...,,we'd like to talk with you about fear t...,we'd like to talk with you about fear t...,wed like to talk with you about fear th...,wed like to talk with you about fear they said...
3,The Wise Men will unlearn your name.\nAbove yo...,poetry,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name\nabove you...,the wise men will unlearn your name above your...
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel,for jack collom\n10 things i do every day\n\np...,for jack collom\n things i do every day\n\npla...,for jack collom\n things i do every day\n\npla...,for jack collom things i do every day play pok...


## **Remove Stopwords**


In [18]:
from nltk.corpus import stopwords
def stop_word(text):
  text = str(text)
  return([w for w in nltk.word_tokenize(text) if w not in stopwords.words('english')])


In [19]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [20]:
%%time
df_1['final_review'] = df_1['space_free'].apply(lambda text: [w for w in nltk.word_tokenize(text) if w not in stopwords.words('english')])

CPU times: user 749 ms, sys: 105 ms, total: 853 ms
Wall time: 860 ms


In [21]:
df_1['final_review'].head()

0    [dear, writers, im, compiling, first, hope, se...
1    [philosophic, complex, void, emptiness, skillf...
2    [wed, like, talk, fear, said, many, people, li...
3    [wise, men, unlearn, name, head, star, flame, ...
4    [jack, collom, things, every, day, play, poker...
Name: final_review, dtype: object

In [22]:
df_1.head()

Unnamed: 0,Content,Tag,lowercase,no_free,punc_free,space_free,final_review
0,"Dear Writers, I’m compiling the first in what ...",,"dear writers, i’m compiling the first in what ...","dear writers, i’m compiling the first in what ...",dear writers im compiling the first in what i ...,dear writers im compiling the first in what i ...,"[dear, writers, im, compiling, first, hope, se..."
1,"Philosophic\nin its complex, void emptiness,\n...",novel,"philosophic\nin its complex, void emptiness,\n...","philosophic\nin its complex, void emptiness,\n...",philosophic\nin its complex void emptiness\na ...,philosophic in its complex void emptiness a sk...,"[philosophic, complex, void, emptiness, skillf..."
2,We'd like to talk with you about fear t...,,we'd like to talk with you about fear t...,we'd like to talk with you about fear t...,wed like to talk with you about fear th...,wed like to talk with you about fear they said...,"[wed, like, talk, fear, said, many, people, li..."
3,The Wise Men will unlearn your name.\nAbove yo...,poetry,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name\nabove you...,the wise men will unlearn your name above your...,"[wise, men, unlearn, name, head, star, flame, ..."
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel,for jack collom\n10 things i do every day\n\np...,for jack collom\n things i do every day\n\npla...,for jack collom\n things i do every day\n\npla...,for jack collom things i do every day play pok...,"[jack, collom, things, every, day, play, poker..."


## **Store dataframe into pickle file**

In [23]:
df_1.to_pickle('/content/drive/My Drive/Masters/final.pkl')

In [24]:
test_1 = pd.read_pickle('/content/drive/My Drive/Masters/final.pkl')
test_1.head()

Unnamed: 0,Content,Tag,lowercase,no_free,punc_free,space_free,final_review
0,"Dear Writers, I’m compiling the first in what ...",,"dear writers, i’m compiling the first in what ...","dear writers, i’m compiling the first in what ...",dear writers im compiling the first in what i ...,dear writers im compiling the first in what i ...,"[dear, writers, im, compiling, first, hope, se..."
1,"Philosophic\nin its complex, void emptiness,\n...",novel,"philosophic\nin its complex, void emptiness,\n...","philosophic\nin its complex, void emptiness,\n...",philosophic\nin its complex void emptiness\na ...,philosophic in its complex void emptiness a sk...,"[philosophic, complex, void, emptiness, skillf..."
2,We'd like to talk with you about fear t...,,we'd like to talk with you about fear t...,we'd like to talk with you about fear t...,wed like to talk with you about fear th...,wed like to talk with you about fear they said...,"[wed, like, talk, fear, said, many, people, li..."
3,The Wise Men will unlearn your name.\nAbove yo...,poetry,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name\nabove you...,the wise men will unlearn your name above your...,"[wise, men, unlearn, name, head, star, flame, ..."
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel,for jack collom\n10 things i do every day\n\np...,for jack collom\n things i do every day\n\npla...,for jack collom\n things i do every day\n\npla...,for jack collom things i do every day play pok...,"[jack, collom, things, every, day, play, poker..."


#**LSTM Model** 


In [25]:
import pandas as pd
words= pd.read_pickle('/content/drive/My Drive/Masters/final.pkl')
words.head()

Unnamed: 0,Content,Tag,lowercase,no_free,punc_free,space_free,final_review
0,"Dear Writers, I’m compiling the first in what ...",,"dear writers, i’m compiling the first in what ...","dear writers, i’m compiling the first in what ...",dear writers im compiling the first in what i ...,dear writers im compiling the first in what i ...,"[dear, writers, im, compiling, first, hope, se..."
1,"Philosophic\nin its complex, void emptiness,\n...",novel,"philosophic\nin its complex, void emptiness,\n...","philosophic\nin its complex, void emptiness,\n...",philosophic\nin its complex void emptiness\na ...,philosophic in its complex void emptiness a sk...,"[philosophic, complex, void, emptiness, skillf..."
2,We'd like to talk with you about fear t...,,we'd like to talk with you about fear t...,we'd like to talk with you about fear t...,wed like to talk with you about fear th...,wed like to talk with you about fear they said...,"[wed, like, talk, fear, said, many, people, li..."
3,The Wise Men will unlearn your name.\nAbove yo...,poetry,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name\nabove you...,the wise men will unlearn your name above your...,"[wise, men, unlearn, name, head, star, flame, ..."
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel,for jack collom\n10 things i do every day\n\np...,for jack collom\n things i do every day\n\npla...,for jack collom\n things i do every day\n\npla...,for jack collom things i do every day play pok...,"[jack, collom, things, every, day, play, poker..."


In [26]:
words['final_review_text'] = words['final_review'].apply( lambda name : ' '.join(name))
words.head()

Unnamed: 0,Content,Tag,lowercase,no_free,punc_free,space_free,final_review,final_review_text
0,"Dear Writers, I’m compiling the first in what ...",,"dear writers, i’m compiling the first in what ...","dear writers, i’m compiling the first in what ...",dear writers im compiling the first in what i ...,dear writers im compiling the first in what i ...,"[dear, writers, im, compiling, first, hope, se...",dear writers im compiling first hope series pu...
1,"Philosophic\nin its complex, void emptiness,\n...",novel,"philosophic\nin its complex, void emptiness,\n...","philosophic\nin its complex, void emptiness,\n...",philosophic\nin its complex void emptiness\na ...,philosophic in its complex void emptiness a sk...,"[philosophic, complex, void, emptiness, skillf...",philosophic complex void emptiness skillful pu...
2,We'd like to talk with you about fear t...,,we'd like to talk with you about fear t...,we'd like to talk with you about fear t...,wed like to talk with you about fear th...,wed like to talk with you about fear they said...,"[wed, like, talk, fear, said, many, people, li...",wed like talk fear said many people live fear ...
3,The Wise Men will unlearn your name.\nAbove yo...,poetry,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name.\nabove yo...,the wise men will unlearn your name\nabove you...,the wise men will unlearn your name above your...,"[wise, men, unlearn, name, head, star, flame, ...",wise men unlearn name head star flame one wear...
4,For Jack Collom\n10 Things I do Every Day\n\np...,novel,for jack collom\n10 things i do every day\n\np...,for jack collom\n things i do every day\n\npla...,for jack collom\n things i do every day\n\npla...,for jack collom things i do every day play pok...,"[jack, collom, things, every, day, play, poker...",jack collom things every day play poker drink ...


In [27]:
new_df = words[['final_review_text', 'Tag']]
new_df.head()

Unnamed: 0,final_review_text,Tag
0,dear writers im compiling first hope series pu...,
1,philosophic complex void emptiness skillful pu...,novel
2,wed like talk fear said many people live fear ...,
3,wise men unlearn name head star flame one wear...,poetry
4,jack collom things every day play poker drink ...,novel


In [28]:
final_text = new_df['final_review_text'].values
final_text

array(['dear writers im compiling first hope series publications im calling artists among artists theme issue faggot dinosaur hope hear thank best wish',
       'philosophic complex void emptiness skillful pundit coined sort stopgap doorstop quaint equations romans never dreamt form completely clever discretea mirror come unsilvered loose watch face without works hollowed globe tip toe unbroken evades grappling hooks mass tilts thin rim thing remains embryonic sum noncogito',
       'wed like talk fear said many people live fear days drove four small car nice boy said beautiful dogs said friendly man ahead woman two waiting drive outside digging garden one home said selling anyway im interested said well nice day said heres card theres phone number call anytime houses road anyone else live wed like talk living fear',
       'wise men unlearn name head star flame one weary sound hoarse roar gale shadows fall tired eyes lone bedside candle dies calendar breeds nights till stores candles 

**OneHot Encoding**

In [100]:
from tensorflow.keras.preprocessing.text import one_hot
vocab_size = 5000 #vocabulary size
onehot_re = [one_hot(words, vocab_size) for words in final_text ]
onehot_re[1]

[4393,
 4713,
 2887,
 1851,
 3391,
 3798,
 288,
 3155,
 1209,
 1840,
 1046,
 4115,
 1689,
 3904,
 1982,
 2072,
 2415,
 2436,
 4948,
 4419,
 1652,
 4422,
 2511,
 1383,
 3760,
 4741,
 1126,
 2522,
 2174,
 512,
 3579,
 1670,
 1635,
 4131,
 2983,
 1534,
 199,
 3456,
 34,
 990,
 4472,
 158,
 3083,
 178]

**Word Embedding**

In [119]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

import numpy as np

# word embedding
maxlen = 500
padding = pad_sequences(onehot_re, maxlen = maxlen,padding='post')
padding[4]

array([1890, 4789, 1023, 1162,  818,  781,  932, 2771, 1134, 1665, 2949,
       1890, 3705, 4620, 3428, 3161, 4658, 4397, 2417, 4318, 1160, 1268,
       4536,  435, 4828,  726,  506, 1196, 2302,  231, 2941, 1944, 3098,
       1600, 3883, 2945, 4700, 4866, 3013, 3211, 1162,  818, 1540, 4919,
       4220, 3628, 1501,  651, 4067, 2650, 2826, 3884, 3169,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

**Training, Validaton and Testing Split**

In [120]:
from sklearn.model_selection import train_test_split
score = new_df['Tag'].factorize()
score

(array([-1,  0, -1,  1,  0,  0,  1,  1,  0,  0,  1,  1,  1,  1,  1,  1, -1,
         1,  0,  0,  0, -1,  1,  1,  0,  1,  1,  1,  1,  1,  0,  0,  0,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]),
 Index(['novel', 'poetry'], dtype='object'))

In [121]:
X_train, X_test,y_train,y_test = train_test_split(padding,score[0], test_size = 0.2, random_state = 42)

In [122]:
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,test_size= 0.2 , random_state = 42)

In [123]:
dim = 2

**LSTM model building**

In [125]:
from keras.models import Sequential, Model
from keras.layers import Embedding, Dense, Dropout, Activation, Input, LSTM

model = Sequential()
model.add(Embedding(vocab_size, dim, input_length=maxlen))
model.add(Dropout(.2))
model.add(LSTM(50))
model.add(Dropout(.2))
model.add(Dense(10,activation='relu'))
model.add(Dense(1))


model.compile(optimizer= 'adam',loss='binary_crossentropy', metrics= ['accuracy'])
model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_7 (Embedding)      (None, 500, 2)            10000     
_________________________________________________________________
dropout_14 (Dropout)         (None, 500, 2)            0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 50)                10600     
_________________________________________________________________
dropout_15 (Dropout)         (None, 50)                0         
_________________________________________________________________
dense_14 (Dense)             (None, 10)                510       
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 11        
Total params: 21,121
Trainable params: 21,121
Non-trainable params: 0
__________________________________________________

**Model Fitting**

In [128]:
%%time
model.fit(X_train,y_train,epochs=20,validation_data= (X_test,y_test),batch_size= 8)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
CPU times: user 30.2 s, sys: 849 ms, total: 31.1 s
Wall time: 18.2 s


<tensorflow.python.keras.callbacks.History at 0x7efc603dfac8>

**Confusion Matrix**  

In [129]:
from sklearn.metrics import confusion_matrix
predictions = (model.predict(X_test))
predictions = np.round(predictions)
cm= confusion_matrix(y_test, predictions)
print(cm)

[[6 0]
 [3 0]]


In [131]:
from sklearn.metrics import accuracy_score

# Getting score metrics from our model
accuracy_score = model.evaluate(X_test, y_test, verbose=0)

# Displays the accuracy of correct sentiment prediction over test data
print("Accuracy: %.2f%%" % (accuracy_score[1]*100))

Accuracy: 66.67%


In [130]:
from sklearn.metrics import classification_report
target_names = ['poetry','novel']
print(classification_report(y_test,predictions, target_names=target_names))

              precision    recall  f1-score   support

      poetry       0.67      1.00      0.80         6
       novel       0.00      0.00      0.00         3

    accuracy                           0.67         9
   macro avg       0.33      0.50      0.40         9
weighted avg       0.44      0.67      0.53         9



  _warn_prf(average, modifier, msg_start, len(result))


In [144]:
# predict on a sample text without padding.
# to select the 4th poetry in the dataframe and to predict its category.
predictions = model.predict(onehot_re[4])
predictions[4]

array([0.05393604], dtype=float32)

In [151]:
if (0 <= predictions[4] <= 1):
  print("Text is Poetry")
elif(predictions[4] == 0):
  print("Text is Novel ")
else: 
  print("Text is N/A")

Text is Poetry
