In [None]:
import pandas as pd
from collections import namedtuple
from nltk.corpus import stopwords
from gensim.utils import simple_preprocess
from gensim.models import doc2vec

from collections import OrderedDict
import multiprocessing

In [38]:
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report
from sklearn.random_projection import SparseRandomProjection

from collections import Counter
from sklearn.pipeline import make_pipeline
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline as make_pipeline_imb
from imblearn.metrics import classification_report_imbalanced

In [11]:
assert gensim.models.doc2vec.FAST_VERSION > -1, "This will be painfully slow otherwise"

In [4]:
#Load x_train, y_train, x_test, y_test
x_train=pd.read_csv('../Data/text x_train.csv',header=None,names=['Index','Text'])
y_train = pd.read_csv('../Data/text y_train.csv',header=None,names=['Index','Label'])
x_test = pd.read_csv('../Data/text x_test.csv',header=None,names=['Index','Text'])
y_test = pd.read_csv('../Data/text y_test.csv',header=None, names=['Index','Label'])

In [5]:
train_documents = list(x_train.itertuples(index=False))

In [12]:
test_documents = list(x_test.itertuples(index=False))

In [6]:
%%time
# Transform data
docs = []
stops=set(stopwords.words('english'))
analyzedDocument = namedtuple('AnalyzedDocument', 'words tags')
for d in train_documents:
    #tokenize
    tokens = simple_preprocess(d[1])
    #Remove stop words
    filt_doc = [word for word in tokens if word not in stops]
#     print(filt_doc)
    docs.append(analyzedDocument(filt_doc,[d[0]]))

Wall time: 4min 42s


In [13]:
#transform test data
#tokenize
tokens =[simple_preprocess(doc[1]) for doc in test_documents]
#remove stop words
filt_test_docs = [[word for word in token if word not in stops]
            for token in tokens]


In [28]:
%%time
#Train doc2vec model
model=doc2vec.Doc2Vec(docs,dm=1, vector_size=100, window=10, negative=5, hs=0, min_count=2, sample=0, 
            epochs=20, workers=4, alpha=0.05, comment='alpha=0.05')

Wall time: 9min


In [29]:
#Save model
model.save('doc2vec_model_dm1')

In [2]:
model = doc2vec.Doc2Vec.load('doc2vec_model_dm1') 

In [7]:
%%time
#Build the final feature vectors for the classifier
#use infer_vector() to “retrain” the document vector
#vector = model.infer_vector(["system", "response"])

train_regressors = [model.infer_vector(doc.words, steps=10) for doc in docs]


Wall time: 26min 55s


# Logistic Regression
***

In [10]:
from sklearn.linear_model import LogisticRegression

In [11]:
clf = LogisticRegression().fit(train_regressors, y_train['Label'].tolist())



In [17]:
%%time
#Infer vectors for test set
test_regressors = [model.infer_vector(doc, steps=20) for doc in filt_test_docs]

Wall time: 23min 20s


In [18]:
y_pred = clf.predict(test_regressors)

In [21]:
print(classification_report_imbalanced(y_test['Label'], y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.88      0.99      0.04      0.93      0.20      0.04     20295
          1       0.47      0.04      0.99      0.07      0.20      0.04      2975

avg / total       0.82      0.87      0.16      0.82      0.20      0.04     23270



In [22]:
#Accuracy on test set
score=metrics.accuracy_score(y_test['Label'],y_pred)
score

0.8715513536742587

In [23]:
#Confusion matrix
print('Confusion matrix:')
print(metrics.confusion_matrix(y_test['Label'],y_pred))

Confusion matrix:
[[20160   135]
 [ 2854   121]]


# Balancing the class before classification
***

In [26]:
#Initial class distribution
print(sorted(Counter(y_train['Label']).items()))

[(0, 47354), (1, 6940)]


## Random under-sampling

In [29]:
%%time
#make pipeline. include random under sampling
under_sample_pipe = make_pipeline_imb(RandomUnderSampler(),LogisticRegression())
under_sample_pipe.fit(train_regressors, y_train['Label'].tolist())



Wall time: 679 ms


In [30]:
#Make predictions on test set
y_pred = under_sample_pipe.predict(test_regressors)
print(classification_report_imbalanced(y_test['Label'], y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.92      0.71      0.57      0.80      0.64      0.41     20295
          1       0.22      0.57      0.71      0.32      0.64      0.40      2975

avg / total       0.83      0.69      0.59      0.74      0.64      0.41     23270



In [31]:
#Accuracy on test set
score=metrics.accuracy_score(y_test['Label'],y_pred)
score

0.6883111302105716

In [32]:
#Confusion matrix
print('Confusion matrix:')
print(metrics.confusion_matrix(y_test['Label'],y_pred))

Confusion matrix:
[[14314  5981]
 [ 1272  1703]]


## Random over-sampling

In [33]:
%%time
#make pipeline. include random under sampling
over_sample_pipe = make_pipeline_imb(RandomOverSampler(),LogisticRegression())
over_sample_pipe.fit(train_regressors, y_train['Label'].tolist())



Wall time: 4.41 s


In [34]:
#Make predictions on test set
y_pred = over_sample_pipe.predict(test_regressors)
print(classification_report_imbalanced(y_test['Label'], y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.92      0.70      0.58      0.80      0.64      0.41     20295
          1       0.22      0.58      0.70      0.32      0.64      0.40      2975

avg / total       0.83      0.69      0.60      0.74      0.64      0.41     23270



In [35]:
#Accuracy on test set
score=metrics.accuracy_score(y_test['Label'],y_pred)
score

0.6879673399226471

In [36]:
#Confusion matrix
print('Confusion matrix:')
print(metrics.confusion_matrix(y_test['Label'],y_pred))

Confusion matrix:
[[14284  6011]
 [ 1250  1725]]


## SMOTE

In [39]:
%%time
#make pipeline. include random under sampling
smote_pipe = make_pipeline_imb(SMOTE(random_state=42),LogisticRegression())
smote_pipe.fit(train_regressors, y_train['Label'].tolist())



Wall time: 24.1 s


In [40]:
#Make predictions on test set
y_pred = smote_pipe.predict(test_regressors)
print(classification_report_imbalanced(y_test['Label'], y_pred))

                   pre       rec       spe        f1       geo       iba       sup

          0       0.93      0.62      0.66      0.74      0.64      0.41     20295
          1       0.20      0.66      0.62      0.31      0.64      0.41      2975

avg / total       0.83      0.63      0.65      0.69      0.64      0.41     23270



In [41]:
#Accuracy on test set
score=metrics.accuracy_score(y_test['Label'],y_pred)
score

0.6276321443919209

In [42]:
#Confusion matrix
print('Confusion matrix:')
print(metrics.confusion_matrix(y_test['Label'],y_pred))

Confusion matrix:
[[12644  7651]
 [ 1014  1961]]


In [None]:
#ROC plot

# calculate the fpr and tpr for all thresholds of the classification
fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
roc_auc = metrics.auc(fpr, tpr)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
def get_vectors(doc2vec_model, corpus_size, vectors_size, vectors_type):

    """

    Get vectors from trained doc2vec model

    :param doc2vec_model: Trained Doc2Vec model

    :param corpus_size: Size of the data

    :param vectors_size: Size of the embedding vectors

    :param vectors_type: Training or Testing vectors

    :return: list of vectors

    """

    vectors = np.zeros((corpus_size, vectors_size))

    for i in range(0, corpus_size):

        prefix = vectors_type + '_' + str(i)

        vectors[i] = doc2vec_model.docvecs[prefix]

    return vectors





def train_doc2vec(corpus):

    logging.info("Building Doc2Vec vocabulary")

    d2v = doc2vec.Doc2Vec(min_count=1,  # Ignores all words with total frequency lower than this

                          window=10,  # The maximum distance between the current and predicted word within a sentence

                          vector_size=300,  # Dimensionality of the generated feature vectors

                          workers=5,  # Number of worker threads to train the model

                          alpha=0.025,  # The initial learning rate

                          min_alpha=0.00025,  # Learning rate will linearly drop to min_alpha as training progresses

                          dm=1)  # dm defines the training algorithm. If dm=1 means ‘distributed memory’ (PV-DM)

                                 # and dm =0 means ‘distributed bag of words’ (PV-DBOW)

    d2v.build_vocab(corpus)



    logging.info("Training Doc2Vec model")

    # 10 epochs take around 10 minutes on my machine (i7), if you have more time/computational power make it 20

    for epoch in range(10):

        logging.info('Training iteration #{0}'.format(epoch))

        d2v.train(corpus, total_examples=d2v.corpus_count, epochs=d2v.iter)

        # shuffle the corpus

        random.shuffle(corpus)

        # decrease the learning rate

        d2v.alpha -= 0.0002

        # fix the learning rate, no decay

        d2v.min_alpha = d2v.alpha



    logging.info("Saving trained Doc2Vec model")

    d2v.save("d2v.model")

    return d2v





def train_classifier(d2v, training_vectors, training_labels):

    logging.info("Classifier training")

    train_vectors = get_vectors(d2v, len(training_vectors), 300, 'Train')

    model = LogisticRegression()

    model.fit(train_vectors, np.array(training_labels))

    training_predictions = model.predict(train_vectors)

    logging.info('Training predicted classes: {}'.format(np.unique(training_predictions)))

    logging.info('Training accuracy: {}'.format(accuracy_score(training_labels, training_predictions)))

    logging.info('Training F1 score: {}'.format(f1_score(training_labels, training_predictions, average='weighted')))

    return model





def test_classifier(d2v, classifier, testing_vectors, testing_labels):

    logging.info("Classifier testing")

    test_vectors = get_vectors(d2v, len(testing_vectors), 300, 'Test')

    testing_predictions = classifier.predict(test_vectors)

    logging.info('Testing predicted classes: {}'.format(np.unique(testing_predictions)))

    logging.info('Testing accuracy: {}'.format(accuracy_score(testing_labels, testing_predictions)))

    logging.info('Testing F1 score: {}'.format(f1_score(testing_labels, testing_predictions, average='weighted')))





if __name__ == "__main__":

    x_train, x_test, y_train, y_test, all_data = read_dataset('dataset.csv')

    d2v_model = train_doc2vec(all_data)

    classifier = train_classifier(d2v_model, x_train, y_train)

    test_classifier(d2v_model, classifier, x_test, y_test)

In [None]:
Doc2Vec(dm=1, vector_size=100, window=10, negative=5, hs=0, min_count=2, sample=0, 
            epochs=20, workers=cores, alpha=0.05, comment='alpha=0.05'),


In [None]:
Doc2Vec(dm=0, vector_size=100, negative=5, hs=0, min_count=2, sample=0, 
            epochs=20, workers=cores)

In [None]:
# Train model (set min_count = 1, if you want the model to work with the provided example data set

model = doc2vec.Doc2Vec(docs, size = 100, window = 300, min_count = 1, workers = 4)

In [22]:
cores = multiprocessing.cpu_count()
cores

12

In [None]:
model = Doc2Vec(sents, size=1, window=100, iter=20, dm=1)

In [None]:

model = doc2vec.Doc2Vec(docs, size = 300, window = 10, dm=1, negative=5, hs=0, min_count = 1, workers = 4, iter = 20)

In [None]:
[5]:
model = gensim.models.doc2vec.Doc2Vec(size=100, min_count=2, iter=55)

In [None]:
%%time
cores = multiprocessing.cpu_count()


simple_models = [
    # PV-DBOW plain
    Doc2Vec(dm=0, vector_size=100, negative=5, hs=0, min_count=2, sample=0, 
            epochs=20, workers=cores),
    # PV-DM w/ default averaging; a higher starting alpha may improve CBOW/PV-DM modes
    Doc2Vec(dm=1, vector_size=100, window=10, negative=5, hs=0, min_count=2, sample=0, 
            epochs=20, workers=cores, alpha=0.05, comment='alpha=0.05'),
    # PV-DM w/ concatenation - big, slow, experimental mode
    # window=5 (both sides) approximates paper's apparent 10-word total window size
    Doc2Vec(dm=1, dm_concat=1, vector_size=100, window=5, negative=5, hs=0, min_count=2, sample=0, 
            epochs=20, workers=cores),
]

for model in simple_models:
    model.build_vocab(alldocs)
    print("%s vocabulary scanned & state initialized" % model)

models_by_name = OrderedDict((str(model), model) for model in simple_models)

In [1]:

# from sklearn.linear_model import LogisticRegression
# from sklearn.linear_model import SGDClassifier
# from sklearn.pipeline import Pipeline
# from sklearn.model_selection import GridSearchCV
# from sklearn.metrics import classification_report

# from gensim.models import Doc2Vec

# from gensim.models.doc2vec import TaggedDocument

In [2]:
# import torch
# import torch.nn as nn
# from torch.autograd import Variable
# from torch.nn import functional as F

# import re
# import seaborn as sns
# import matplotlib.pyplot as plt

# Bag of Words
***

# Logistic Regression
***

In [15]:
clf_logistic = LogisticRegression().fit(X_new, y_train)

  y = column_or_1d(y, warn=True)


In [17]:
clf_logistic.score(X_new,y_train)

0.8877776549895016

In [27]:
#Performance on test set
predicted = clf_logistic.predict(X_test_new)
score=metrics.accuracy_score(y_test,predicted)
score

0.8721529866781264

In [28]:
#Classification report
print('Classification report:')
print(metrics.classification_report(y_test,predicted))

Classification report:
             precision    recall  f1-score   support

          0       0.87      1.00      0.93     20295
          1       0.00      0.00      0.00      2975

avg / total       0.76      0.87      0.81     23270



  'precision', 'predicted', average, warn_for)


In [29]:
#Confusion matrix
print('Confusion matrix:')
print(metrics.confusion_matrix(y_test,predicted))

Confusion matrix:
[[20295     0]
 [ 2975     0]]


In [38]:
# # remove punctuation from each word
# import string
# table = str.maketrans('', '', string.punctuation)
# stripped = [w.translate(table) for w in tokens]

# # remove remaining tokens that are not alphabetic
# words = [word for word in stripped if word.isalpha()]

# # filter out stop words
# from nltk.corpus import stopwords
# stop_words = set(stopwords.words('english'))
# words = [w for w in words if not w in stop_words]

In [110]:
# # Stratified Train/Test split

# stratified_split = StratifiedShuffleSplit(n_splits=2, test_size=0.3)
# for train_index, test_index in stratified_split.split(X, y):
#     x_train, x_test = X[train_index], X[test_index]
#     y_train, y_test = y[train_index], y[test_index]

# # transform matrix of plots into lists to pass to a TfidfVectorizer
# train_x = [x[0].strip() for x in x_train.tolist()]
# test_x = [x[0].strip() for x in x_test.tolist()]

In [None]:
# #Naive Bayes

# pipeline = Pipeline([
#     ('tfidf', TfidfVectorizer(stop_words=stop_words)),
#     ('clf', OneVsRestClassifier(MultinomialNB(
#         fit_prior=True, class_prior=None))),
# ])
# parameters = {
#     'tfidf__max_df': (0.25, 0.5, 0.75),
#     'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],
#     'clf__estimator__alpha': (1e-2, 1e-3)
# }

In [None]:
from sklearn.cross_validation import train_test_split
# split the dataset into training and validation datasets 
train_x, valid_x, train_y, valid_y = train_test_split(X, y, test_size=0.3)

In [None]:
def train_model(classifier, feature_vector_train, label, feature_vector_valid):
    # fit the training dataset on the classifier
    classifier.fit(feature_vector_train, label)
    
    # predict the labels on validation dataset
    predictions = classifier.predict(feature_vector_valid)
    
    return metrics.accuracy_score(predictions, valid_y)

In [None]:
#Naive Bayes

# Naive Bayes on Count Vectors
accuracy = train_model(MultinomialNB(), xtrain_count, train_y, xvalid_count)
print("NB, Count Vectors: ", accuracy)

# Naive Bayes on Word Level TF IDF Vectors
accuracy = train_model(MultinomialNB(), xtrain_tfidf, train_y, xvalid_tfidf)
print("NB, Word TF-IDF: ", accuracy)

In [None]:
#Read metadata to obtain the class labels






In [None]:
#Remove stop words


In [None]:
#Stemming


In [None]:
#Naive Bayes

pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words=stop_words)),
    ('clf', OneVsRestClassifier(MultinomialNB(
        fit_prior=True, class_prior=None))),
])
parameters = {
    'tfidf__max_df': (0.25, 0.5, 0.75),
    'tfidf__ngram_range': [(1, 1), (1, 2), (1, 3)],
    'clf__estimator__alpha': (1e-2, 1e-3)
}

In [None]:
#Logistic Regression

clf = MultinomialNB().fit(X_train_tfidf, twenty_train.target)

text_clf = Pipeline([('vect', CountVectorizer(stop_words='english')),
...                      ('tfidf', TfidfTransformer()),
...                      ('clf', MultinomialNB()),
... ])

text_clf = text_clf.fit(twenty_train.data, twenty_train.target)

In [None]:
#Performance on test set

twenty_test = fetch_20newsgroups(subset='test', shuffle=True)
predicted = text_clf.predict(twenty_test.data)
np.mean(predicted == twenty_test.target)

In [None]:
#Linear SVM


>>> text_clf_svm = Pipeline([('vect', CountVectorizer()),
...                      ('tfidf', TfidfTransformer()),
...                      ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',
...                                            alpha=1e-3, n_iter=5, random_state=42)),
... ])
>>> _ = text_clf_svm.fit(twenty_train.data, twenty_train.target)
>>> predicted_svm = text_clf_svm.predict(twenty_test.data)
>>> np.mean(predicted_svm == twenty_test.target)

In [None]:
#Grid Search

>>> parameters = {'vect__ngram_range': [(1, 1), (1, 2)],
...               'tfidf__use_idf': (True, False),
...               'clf__alpha': (1e-2, 1e-3),
... }

gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)
gs_clf = gs_clf.fit(twenty_train.data, twenty_train.target)

gs_clf.best_score_
gs_clf.best_params_



In [None]:
grid_search_tune = GridSearchCV(
    pipeline, parameters, cv=2, n_jobs=2, verbose=3)
grid_search_tune.fit(train_x, train_y)

print
print("Best parameters set:")
print grid_search_tune.best_estimator_.steps
print

# measuring performance on test set
print "Applying best classifier on test data:"
best_clf = grid_search_tune.best_estimator_
predictions = best_clf.predict(test_x)

print classification_report(test_y, predictions, target_names=genres)

In [None]:
>>> from sklearn.model_selection import GridSearchCV
>>> parameters_svm = {'vect__ngram_range': [(1, 1), (1, 2)],
...               'tfidf__use_idf': (True, False),
...               'clf-svm__alpha': (1e-2, 1e-3),
... }
gs_clf_svm = GridSearchCV(text_clf_svm, parameters_svm, n_jobs=-1)
gs_clf_svm = gs_clf_svm.fit(twenty_train.data, twenty_train.target)
gs_clf_svm.best_score_
gs_clf_svm.best_params_

In [None]:
#Performance






# Word embeddings model
***

In [None]:
#Train a word embedding network

In [1]:
import multiprocessing
cores = multiprocessing.cpu_count()

In [2]:
cores

12

In [None]:
#Build a vocabulary

model_dbow = Doc2Vec(dm=0, vector_size=300, negative=5, hs=0, min_count=2, sample = 0, workers=cores)
model_dbow.build_vocab([x for x in tqdm(train_tagged.values)])

In [None]:
#Train doc2vec

%%time
for epoch in range(30):
    model_dbow.train(utils.shuffle([x for x in tqdm(train_tagged.values)]), total_examples=len(train_tagged.values), epochs=1)
    model_dbow.alpha -= 0.002
    model_dbow.min_alpha = model_dbow.alpha

In [None]:
#Build final feature vector for classifier

def vec_for_learning(model, tagged_docs):
    sents = tagged_docs.values
    targets, regressors = zip(*[(doc.tags[0], model.infer_vector(doc.words, steps=20)) for doc in sents])
    return targets, regressorsdef vec_for_learning(model, tagged_docs):
    sents = tagged_docs.values
    targets, regressors = zip(*[(doc.tags[0], model.infer_vector(doc.words, steps=20)) for doc in sents])
    return targets, regressors

In [None]:
#Train Logistic Regression Classifier

y_train, X_train = vec_for_learning(model_dbow, train_tagged)
y_test, X_test = vec_for_learning(model_dbow, test_tagged)
logreg = LogisticRegression(n_jobs=1, C=1e5)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
from sklearn.metrics import accuracy_score, f1_score
print('Testing accuracy %s' % accuracy_score(y_test, y_pred))
print('Testing F1 score: {}'.format(f1_score(y_test, y_pred, average='weighted')))



In [None]:
# load the pre-trained word-embedding vectors 
embeddings_index = {}
for i, line in enumerate(open('data/wiki-news-300d-1M.vec')):
    values = line.split()
    embeddings_index[values[0]] = numpy.asarray(values[1:], dtype='float32')

# create a tokenizer 
token = text.Tokenizer()
token.fit_on_texts(trainDF['text'])
word_index = token.word_index

# convert text to sequence of tokens and pad them to ensure equal length vectors 
train_seq_x = sequence.pad_sequences(token.texts_to_sequences(train_x), maxlen=70)
valid_seq_x = sequence.pad_sequences(token.texts_to_sequences(valid_x), maxlen=70)

# create token-embedding mapping
embedding_matrix = numpy.zeros((len(word_index) + 1, 300))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

# Word Embedding + Convolutional Neural Network
***

In [None]:
class CNN(nn.Module):

    def __init__(self, batch_size, output_size, in_channels, out_channels, kernel_heights, stride, padding, keep_probab, vocab_size, embedding_length, weights):

        super(CNN, self).__init__()


		"""

		Arguments

		---------

		batch_size : Size of each batch which is same as the batch_size of the data returned by the TorchText BucketIterator

		output_size : 2 = (pos, neg)

		in_channels : Number of input channels. Here it is 1 as the input data has dimension = (batch_size, num_seq, embedding_length)

		out_channels : Number of output channels after convolution operation performed on the input matrix

		kernel_heights : A list consisting of 3 different kernel_heights. Convolution will be performed 3 times and finally results from each kernel_height will be concatenated.

		keep_probab : Probability of retaining an activation node during dropout operation

		vocab_size : Size of the vocabulary containing unique words

		embedding_length : Embedding dimension of GloVe word embeddings

		weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table

		--------

		

		"""

        self.batch_size = batch_size
        self.output_size = output_size
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_heights = kernel_heights
        self.stride = stride
        self.padding = padding
        self.vocab_size = vocab_size
        self.embedding_length = embedding_length


        self.word_embeddings = nn.Embedding(vocab_size, embedding_length)
        self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False)
        self.conv1 = nn.Conv2d(in_channels, out_channels, (kernel_heights[0], embedding_length), stride, padding)
        self.conv2 = nn.Conv2d(in_channels, out_channels, (kernel_heights[1], embedding_length), stride, padding)
        self.conv3 = nn.Conv2d(in_channels, out_channels, (kernel_heights[2], embedding_length), stride, padding)
        self.dropout = nn.Dropout(keep_probab)
        self.label = nn.Linear(len(kernel_heights)*out_channels, output_size)

    def conv_block(self, input, conv_layer):

        conv_out = conv_layer(input)# conv_out.size() = (batch_size, out_channels, dim, 1)
        activation = F.relu(conv_out.squeeze(3))# activation.size() = (batch_size, out_channels, dim1)
        max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2)# maxpool_out.size() = (batch_size, out_channels)

        return max_out


    def forward(self, input_sentences, batch_size=None):


		"""

		The idea of the Convolutional Neural Netwok for Text Classification is very simple. We perform convolution operation on the embedding matrix 

		whose shape for each batch is (num_seq, embedding_length) with kernel of varying height but constant width which is same as the embedding_length.

		We will be using ReLU activation after the convolution operation and then for each kernel height, we will use max_pool operation on each tensor 

		and will filter all the maximum activation for every channel and then we will concatenate the resulting tensors. This output is then fully connected

		to the output layers consisting two units which basically gives us the logits for both positive and negative classes.

		

		Parameters

		----------

		input_sentences: input_sentences of shape = (batch_size, num_sequences)

		batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1)

		

		Returns

		-------

		Output of the linear layer containing logits for pos & neg class.

		logits.size() = (batch_size, output_size)

		

		"""


        input = self.word_embeddings(input_sentences)
        # input.size() = (batch_size, num_seq, embedding_length)
        input = input.unsqueeze(1)
        # input.size() = (batch_size, 1, num_seq, embedding_length)
        max_out1 = self.conv_block(input, self.conv1)
        max_out2 = self.conv_block(input, self.conv2)
        max_out3 = self.conv_block(input, self.conv3)


        all_out = torch.cat((max_out1, max_out2, max_out3), 1)
        # all_out.size() = (batch_size, num_kernels*out_channels)

        fc_in = self.dropout(all_out)

        # fc_in.size()) = (batch_size, num_kernels*out_channels)

        logits = self.label(fc_in)



        return logits

# Conclusion
***

# Predictions on new data (115th Congress)
***