# Working with Twitter data

In this lecture, you will learn how to preprocess actual twitter data and create word embeddings from a pre-trained source. 

## Import the necessary modules

In [1]:
#we will now use pytorch instead of keras, since tensorflow no longer works with latest version of Python

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
np.random.seed(0)
SEED = 0
import pandas as pd
import numpy as np
import torch #pytorch
import torch.nn as nn
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [3]:
import torch #pytorch
import torch.nn as nn

## Do some initial cleaning using the twitter preprocessing Python package


In [5]:
#step 1: bring in the cleaned up twitter data: used preprocessing python package to clean up the initial twitter data
%cd C:\Users\Max\Documents\TwitterDataModeling\TwitterData
import pandas as pd
twitter_labeled=pd.read_csv("US_AMEX_Tweets_recent2_MV_labels2_05012020_05042020_v2.csv", encoding="utf-8-sig")
#need to save as utf-8 .csv file from excel or other source
twitter_labeled2=twitter_labeled.drop_duplicates()
dups=pd.DataFrame(twitter_labeled2['text'].value_counts()).rename(columns={'text':'count'})
twitter_labeled3=pd.merge(twitter_labeled2, dups, left_on='text', right_index=True, how="inner")
twitter_labeled4=twitter_labeled3.drop(axis=1, index=[226, 760, 235, 757, 247, 759, 535, 330, 680, 780])
twitter_labeled4.drop('count', axis=1, inplace=True)
len(twitter_labeled4) #we have 442 unique tweets

C:\Users\Max\Documents\TwitterDataModeling\TwitterData


442

In [6]:
twitter_labeled4.columns

Index(['text', 'Label'], dtype='object')

## Since the goal here is to capture negative sentiment, let us convert into 2 classes only: 'negative' and 'non-negative'

In [7]:
#step 2: only consider 2 classes, negative and non negative, since we want to capture the negatuve sentiment: create
#training and test sets
twitter_labeled4.loc[:, 'Label_comb']=twitter_labeled4['Label'].map(lambda x: 0 if x==-1 else 1)
twitter_labeled4['Label_comb'].value_counts()
neg_tweets=twitter_labeled4[twitter_labeled4['Label_comb']==0]
non_neg_tweets=twitter_labeled4[twitter_labeled4['Label_comb']==1]
X_train=np.array([x for x in non_neg_tweets['text'][:150]]+[x for x in neg_tweets['text'][:150]])
Y_train=np.array([x for x in non_neg_tweets['Label_comb'][:150]]+[x for x in neg_tweets['Label_comb'][:150]])
X_test=np.array([x for x in non_neg_tweets['text'][150:]]+[x for x in neg_tweets['text'][150:]])
Y_test=np.array([x for x in non_neg_tweets['Label_comb'][150:]]+[x for x in neg_tweets['Label_comb'][150:]])
X_train_test=np.array([x for x in X_train]+[x for x in X_test])
y_train_test=np.array([x for x in Y_train]+[x for x in Y_test])
len(X_train), len(X_test), len(Y_train), len(Y_test), len(X_train_test), len(y_train_test)

(300, 142, 300, 142, 442, 442)

## Import pre-trained glove word embeddings

In [8]:
#step 3: bring in word embedding pre-trained vectors: 
import numpy as np
glove_twitter_file='glove.twitter.27B.50d.txt'
def read_glove_vecs(glove_file):
    with open(glove_file, 'r', encoding='utf-8-sig') as f:
        words = set()
        word_to_vec_map = {}
        for line in f:
            line = line.strip().split()#removes leading and trailing spaces
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
        
        i = 1
        words_to_index = {}
        index_to_words = {}
        for w in sorted(words):
            words_to_index[w] = i
            index_to_words[i] = w
            i = i + 1
    return words_to_index, index_to_words, word_to_vec_map
words_to_index, index_to_words, word_to_vec_map=read_glove_vecs(glove_twitter_file)
wordsintwitterglove=[]
for value in index_to_words.values(): 
    wordsintwitterglove.append(value)
#almost 2MM words


In [9]:
len(wordsintwitterglove)

1193514

In [10]:
print("The word 'cards' in the embedding space is: ")
print(word_to_vec_map['cards'])

The word 'cards' in the embedding space is: 
[ 0.49513   0.0292   -0.41095   0.2833    0.94241  -0.37731   0.84692
 -0.51128   0.45714  -0.58924   0.94307   0.90303  -3.4979    0.29424
 -0.26355   0.42858  -0.38724  -0.47719   0.044124  0.49529  -1.1181
 -0.48781   0.60082  -0.64361  -0.71212   0.36798  -0.059819 -0.58809
  0.67646  -0.75717   0.4728   -0.23525  -0.29401  -0.13993   0.69861
 -0.29542  -0.013882  0.065944 -0.38697   0.23558   0.50186   0.09126
  0.4026   -0.39129   0.73219  -0.52371  -0.048465 -1.2898   -0.022145
  0.42831 ]


In [11]:
len(word_to_vec_map['cards'])

50

In [12]:
#each embedding is a vector of length 50

## Typical steps to clean the data for sentiment analysis: 
* Remove hash tags and other distracting symbols
* Remove numbers
* Convert to lower case
* Remove unnecessary punctuation
* Remove stop words: unnecessary/generic words
* Stem sentences: only retain the main roots
* Tokenize sentences: create a list of words from the final cleaned string above

In [12]:
import nltk

In [13]:
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import TweetTokenizer

In [14]:
type(stopwords)

nltk.corpus.util.LazyCorpusLoader

In [15]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Max\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [16]:
stop_words = set(stopwords.words('english'))
stop_words

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'only',
 'or',
 'other',
 'our',
 'ours',
 'ourselves',
 'out',
 'over',
 'own',
 'r

In [17]:
wordstoremove = ['no', 'nor', 'not','don', "don't","aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", "shouldn't", 'wasn', "wasn't", 'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't" ]
stopwords_english = [x for x in stop_words if x not in wordstoremove]
stopwords_english
 

['why',
 'am',
 'i',
 'this',
 'o',
 'herself',
 'ourselves',
 'to',
 'just',
 'by',
 'with',
 'than',
 'if',
 'should',
 'most',
 'did',
 'his',
 'ma',
 've',
 'me',
 'in',
 'out',
 'again',
 'our',
 'are',
 'my',
 'their',
 'few',
 'were',
 'until',
 'has',
 'here',
 'it',
 'because',
 'had',
 'some',
 'such',
 "you'd",
 'who',
 'having',
 'from',
 'up',
 'about',
 'too',
 'does',
 'into',
 'the',
 'there',
 'for',
 'these',
 'them',
 'shan',
 'be',
 'or',
 'an',
 'aren',
 'll',
 'as',
 "shan't",
 'her',
 'that',
 'any',
 'd',
 'its',
 'very',
 'will',
 'when',
 'own',
 "it's",
 'itself',
 'before',
 'she',
 'of',
 'above',
 'while',
 're',
 'you',
 'shouldn',
 'a',
 'is',
 'same',
 "needn't",
 'we',
 "she's",
 'him',
 'mightn',
 'only',
 'where',
 'which',
 'on',
 'do',
 'against',
 'ain',
 'once',
 'himself',
 'y',
 "you've",
 'down',
 'yourself',
 'yourselves',
 'themselves',
 'yours',
 'other',
 'over',
 'they',
 'how',
 "mustn't",
 'needn',
 'all',
 'can',
 'hers',
 'doing',
 'o

In [18]:
!pip3 install num2words



In [19]:
#step 4: function to clean this twitter data some more to create words that can be mapped to the embedding matrix: 
#function to take in a sentence and return a list of words, which are more ready to be vectorized by the embedding matrix: 
def clean_sentence(sentence): 
    import re
    #import num2word
    from num2words import num2words
    stemmer = PorterStemmer()
    sentence = re.sub(r'\$w*','', sentence) #remove stock market tickers
    sentence = re.sub('\d+', '', sentence) #remove numbers
    sentence = re.sub(r'^RT[\s]+', '', sentence) #remove old style RT in tweet
    sentence = re.sub(r'^https?:\/\/.*[\r\n]*', '', sentence, flags=re.MULTILINE) #remove hyper links
    sentence = re.sub(r'#', '', sentence) #remove hash tags
    sentence = re.sub(r'([A-Za-z0-9]+[.-_])*[A-Za-z0-9]+@[A-Za-z0-9-]+(\.[A-Z|a-z]{2,})+','', sentence) #remove email addresses
    
    punc_to_remove='’!"#%&\'()*+,/:;<=>?@[\\]^_`{|}~' #exclude dollar sign and dot as we want to first convert dollar amounts
    #and decimals to word numerics, and then we can remove these punctuation signs: 
    #print(sentence)
    words=re.split('\s+',sentence.lower())
    table = str.maketrans('', '', punc_to_remove) #remove punctuation as it could be at the end of the word
    words = [w.translate(table) for w in words]
    words=[x for x in words if x!='']
    sentence=" ".join(words)
    #first replace only the actual numbers with words: 
    th_pattern='\d+th'
    th_numbers_to_fix=re.findall(th_pattern, sentence) #list of th numbers to convert to numerics
    dollar_pattern='\$\.?0?\d+\.?\d*'
    dollars_to_fix=re.findall(dollar_pattern, sentence)
    words=re.split('\s+',sentence.lower())
    #print(words)
    remove_numerics=[]
    for word in words: 
        try:
            if(word in th_numbers_to_fix):            
                remove_numerics.append(num2words(int(float(word.replace('th',''))), to='ordinal'))
            elif (word in dollars_to_fix): 
                number=int(float(word.replace('$','')))
                if(number==1): 
                    remove_numerics.append(num2words(int(float(word.replace('$','')))))
                    remove_numerics.append('dollar')
                else: 
                    remove_numerics.append(num2words(int(float(word.replace('$','')))))
                    remove_numerics.append('dollars')  
            else:
                number=int(float(word)) #convert decimal or integer string into an integer
                #remove_numerics.append(num2word.word(number))
        except: 
            remove_numerics.append(word)
    punc_to_remove='$.' #exclude dollar sign
    sentence=" ".join(remove_numerics)
    words=re.split('\s+',sentence.lower())
    table = str.maketrans('', '', punc_to_remove) #remove punctuation as it could be at the end of the word
    words = [w.translate(table) for w in words]
    words=[x for x in words if x not in ('', '-')]
    tweets_clean=[]
    for word in words: 
        if (word not in stopwords_english) and (word not in ('<mailto:>')): 
            stem_word=stemmer.stem(word)
            tweets_clean.append(stem_word)
    return tweets_clean
sentence="I used five 5 to work for American Express and spent some time in NY. It was on her bucket list to visit, so I took her there a year and a half ago. We love living around no one, but NYC is a fun place to visit! Well probably not so much at the moment, but we’ll get through this!"
words=clean_sentence(sentence)
print("Original sentence is: ")
print(" ")
print(sentence)
print(" ")
print("Tokenized sentence is now: ")
print(" ")
print(words)

I used five  to work for American Express and spent some time in NY. It was on her bucket list to visit, so I took her there a year and a half ago. We love living around no one, but NYC is a fun place to visit! Well probably not so much at the moment, but we’ll get through this!
['i', 'used', 'five', 'to', 'work', 'for', 'american', 'express', 'and', 'spent', 'some', 'time', 'in', 'ny.', 'it', 'was', 'on', 'her', 'bucket', 'list', 'to', 'visit', 'so', 'i', 'took', 'her', 'there', 'a', 'year', 'and', 'a', 'half', 'ago.', 'we', 'love', 'living', 'around', 'no', 'one', 'but', 'nyc', 'is', 'a', 'fun', 'place', 'to', 'visit', 'well', 'probably', 'not', 'so', 'much', 'at', 'the', 'moment', 'but', 'well', 'get', 'through', 'this']
Original sentence is: 
 
I used five 5 to work for American Express and spent some time in NY. It was on her bucket list to visit, so I took her there a year and a half ago. We love living around no one, but NYC is a fun place to visit! Well probably not so much at 

In [20]:
%pwd

'C:\\Users\\Max\\Documents\\TwitterDataModeling\\TwitterData'

## Now, let us look at our training and test sets: 


In [21]:
X_train[0:2]

array(['Question of the Day - Is it worth the effort to get the Hilton Ascend credit card that can be linked to Wyndham Diamond and Caesars Diamond status rewards cards?',
       'Today’s Digital Transactions News: T&E Plunge Hammers AmEx; PayFac Volume To Hit $4 Trillion by 2025; Plus Weekly Recap'],
      dtype='<U280')

In [22]:
X_test[0:2]

array(['rumor has it amex might be planning additional benefits to their platinum card... meanwhile the sapphire took a downfall with their recent updates (imo anyway....the benefits are useless to me making the price increase not worth it)',
       '[Targeted] AmEx Offer: , Spend $100+ & Receive $30 Statement Credit + $50 Off $150+'],
      dtype='<U279')

In [23]:
X_test[1]

'[Targeted] AmEx Offer: , Spend $100+ & Receive $30 Statement Credit + $50 Off $150+'

In [24]:
cleaned=clean_sentence(X_test[1])
cleaned

[Targeted] AmEx Offer: , Spend + & Receive  Statement Credit +  Off +
['targeted', 'amex', 'offer', 'spend', 'receive', 'statement', 'credit', 'off']


['target', 'amex', 'offer', 'spend', 'receiv', 'statement', 'credit']

### Step 1: find the maximum number of words across all sentences. 

In [25]:
def sentences_to_indices(X, word_to_index, max_len):
    """
    Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences.
    The output shape should be such that it can be given to `Embedding()` 
    
    Arguments:
    X -- array of sentences (strings), of shape (m, 1)
    word_to_index -- a dictionary containing the each word mapped to its index
    max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this. 
    
    Returns:
    X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len)
    """
    
    m = X.shape[0]                                   # number of training examples
    
    # Initialize X_indices as a numpy matrix of zeros and the correct shape (≈ 1 line)
    X_indices = np.zeros((m,max_len))
    
    for i in range(m):                               # loop over training examples
        
        # Convert the ith training sentence in lower case and split is into words. You should get a list of words.
        sentence_words=clean_sentence(X[i])
        # Initialize j to 0
        j = 0
        
        # Loop over the words of sentence_words
        for w in sentence_words:
            # Set the (i,j)th entry of X_indices to the index of the correct word.
            if (w in wordsintwitterglove): 
                X_indices[i, j] = word_to_index[w]
                # Increment j to j + 1
                j = j+1
            

    
    return X_indices

In [26]:
#testing the above: 
X1 =  X_train[0:2]
print("X1 =", X1)
maxlen=max(max([len(x.split()) for x in X_train]), max([len(x.split()) for x in X_test]))
print("Max len: " + str(maxlen))
X1_indices = sentences_to_indices(X1,words_to_index, max_len = maxlen)
print("X1_indices =", X1_indices)
print("X1_indices shape = ", X1_indices.shape)

X1 = ['Question of the Day - Is it worth the effort to get the Hilton Ascend credit card that can be linked to Wyndham Diamond and Caesars Diamond status rewards cards?'
 'Today’s Digital Transactions News: T&E Plunge Hammers AmEx; PayFac Volume To Hit $4 Trillion by 2025; Plus Weekly Recap']
Max len: 59
Question of the Day - Is it worth the effort to get the Hilton Ascend credit card that can be linked to Wyndham Diamond and Caesars Diamond status rewards cards?
['question', 'of', 'the', 'day', '-', 'is', 'it', 'worth', 'the', 'effort', 'to', 'get', 'the', 'hilton', 'ascend', 'credit', 'card', 'that', 'can', 'be', 'linked', 'to', 'wyndham', 'diamond', 'and', 'caesars', 'diamond', 'status', 'rewards', 'cards']
Today’s Digital Transactions News: T&E Plunge Hammers AmEx; PayFac Volume To Hit  Trillion by ; Plus Weekly Recap
['todays', 'digital', 'transactions', 'news', 'te', 'plunge', 'hammers', 'amex', 'payfac', 'volume', 'to', 'hit', 'trillion', 'by', 'plus', 'weekly', 'recap']
X1_indi

# Pytorch Code


### Step 2: Define an embedding layer

In [27]:
def pretrained_embedding_matrix(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.
    
    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (2MM words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """
    
    vocab_len = len(word_to_index) + 1                  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]      # define dimensionality of your GloVe word vectors (= 50)
    

    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len,emb_dim )) #dim 
    j=0
    for word, index in word_to_index.items():
        if len(word_to_vec_map[word])==50:
            j+=1
            emb_matrix[index, :] = word_to_vec_map[word]
    print(emb_matrix.shape)
   
    return emb_matrix, vocab_len, emb_dim


In [28]:
#test above code
emb_matrix, vocab_len, emb_dim = pretrained_embedding_matrix(word_to_vec_map, words_to_index)
print("Embedding Matrix Shape "+str(emb_matrix.shape))
print("Vocab Len "+str(vocab_len))
print("Emb Dimension "+str(emb_dim))

(1193515, 50)
Embedding Matrix Shape (1193515, 50)
Vocab Len 1193515
Emb Dimension 50


In [29]:
X_train_indices = sentences_to_indices(X_train, words_to_index, maxlen);

Question of the Day - Is it worth the effort to get the Hilton Ascend credit card that can be linked to Wyndham Diamond and Caesars Diamond status rewards cards?
['question', 'of', 'the', 'day', '-', 'is', 'it', 'worth', 'the', 'effort', 'to', 'get', 'the', 'hilton', 'ascend', 'credit', 'card', 'that', 'can', 'be', 'linked', 'to', 'wyndham', 'diamond', 'and', 'caesars', 'diamond', 'status', 'rewards', 'cards']
Today’s Digital Transactions News: T&E Plunge Hammers AmEx; PayFac Volume To Hit  Trillion by ; Plus Weekly Recap
['todays', 'digital', 'transactions', 'news', 'te', 'plunge', 'hammers', 'amex', 'payfac', 'volume', 'to', 'hit', 'trillion', 'by', 'plus', 'weekly', 'recap']
Bet prediction on Brighton & Hove – Man City: MATCH DETAILS: Location: Brighton, AMEX…
['bet', 'prediction', 'on', 'brighton', 'hove', '–', 'man', 'city', 'match', 'details', 'location', 'brighton', 'amex…']
Win big at The Charity Texas Hold'em Tournament! Grand prize winner gets a , entry into the  Main Event. 

The Platinum Card from American Express review
['the', 'platinum', 'card', 'from', 'american', 'express', 'review']
I used to work for American Express and spent some time in NY. It was on her bucket list to visit, so I took her there a year and a half ago. We love living around no one, but NYC is a fun place to visit! Well probably not so much at the moment, but we’ll get through this!
['i', 'used', 'to', 'work', 'for', 'american', 'express', 'and', 'spent', 'some', 'time', 'in', 'ny.', 'it', 'was', 'on', 'her', 'bucket', 'list', 'to', 'visit', 'so', 'i', 'took', 'her', 'there', 'a', 'year', 'and', 'a', 'half', 'ago.', 'we', 'love', 'living', 'around', 'no', 'one', 'but', 'nyc', 'is', 'a', 'fun', 'place', 'to', 'visit', 'well', 'probably', 'not', 'so', 'much', 'at', 'the', 'moment', 'but', 'well', 'get', 'through', 'this']
Let’s up the ante! This week’s Dine-Around Bingo prize will include... tickets to a concert ThePavilionTMF at ToyotaMusicFac ,  Amex gift card & a  restaurant gift 

Serve Customer Care P.O. Box  Fortson, GA  You must include the following when notifying us of a Dispute: Your name and American Express Serve Account number. Description of the transfer or transaction you are unsure about,
['serve', 'customer', 'care', 'p.o.', 'box', 'fortson', 'ga', 'you', 'must', 'include', 'the', 'following', 'when', 'notifying', 'us', 'of', 'a', 'dispute', 'your', 'name', 'and', 'american', 'express', 'serve', 'account', 'number.', 'description', 'of', 'the', 'transfer', 'or', 'transaction', 'you', 'are', 'unsure', 'about']
You can now pay driver license reinstatement fees online at using Visa, MasterCard, Discover, American Express debit/credit cards or electronic check.
['you', 'can', 'now', 'pay', 'driver', 'license', 'reinstatement', 'fees', 'online', 'at', 'using', 'visa', 'mastercard', 'discover', 'american', 'express', 'debitcredit', 'cards', 'or', 'electronic', 'check.']
My only connect for business, info, consultations. No extra letters, numbers or spaces

Justin Bieber. Bringing Presented by TMobile AmexPresale New merch drop with pre-sale access
['justin', 'bieber.', 'bringing', 'presented', 'by', 'tmobile', 'amexpresale', 'new', 'merch', 'drop', 'with', 'pre-sale', 'access']
 GoPro Hero  Black cameras for  with Amex Offer, YMMV
['gopro', 'hero', 'black', 'cameras', 'for', 'with', 'amex', 'offer', 'ymmv']
c) Plaintiffs pursuing treble damages & settlement payouts may bring weak cases; dismissal rate is from a non-random sample. Meanwhile, agencies have brought, e.g., FTC v Surescripts, FTC v Qualcomm, US v. Amex, FTC v Intel, etc.
['c', 'plaintiffs', 'pursuing', 'treble', 'damages', 'settlement', 'payouts', 'may', 'bring', 'weak', 'cases', 'dismissal', 'rate', 'is', 'from', 'a', 'non-random', 'sample.', 'meanwhile', 'agencies', 'have', 'brought', 'e.g.', 'ftc', 'v', 'surescripts', 'ftc', 'v', 'qualcomm', 'us', 'v.', 'amex', 'ftc', 'v', 'intel', 'etc.']
 Tips to Virtually Recruit New Employees via
['tips', 'to', 'virtually', 'recruit', 

Earn From Home: Get a Free , Amex Points or  with New User Bonus via
['earn', 'from', 'home', 'get', 'a', 'free', 'amex', 'points', 'or', 'with', 'new', 'user', 'bonus', 'via']
Earnings April  AT&T T Delta Air Lines DAL Alcoa AA Kinder Morgan KMI Visa V Las Vegas Sands LVS April  Amazon AMZN Domino's Pizza DPZ Freeport-McMoran FCX Intel INTC United Airlines UAL on April  American Airlines AAL American Express AXP Verizon VZ
['earnings', 'april', 'att', 't', 'delta', 'air', 'lines', 'dal', 'alcoa', 'aa', 'kinder', 'morgan', 'kmi', 'visa', 'v', 'las', 'vegas', 'sands', 'lvs', 'april', 'amazon', 'amzn', 'dominos', 'pizza', 'dpz', 'freeport-mcmoran', 'fcx', 'intel', 'intc', 'united', 'airlines', 'ual', 'on', 'april', 'american', 'airlines', 'aal', 'american', 'express', 'axp', 'verizon', 'vz']
For the first time in The Recorded History of Mankind, I received an Amex invoice with a monthly balance of recorded charges totaling .. Those of you who know will want to call or send him a nice not

Mine wasn’t a presale it was like a advantage ticket. I think the amex presale was mostly on ticketmaster that’s where I got mine when I bought them for my American tour dates hahaha. Honestly no idea tho haha
['mine', 'wasnt', 'a', 'presale', 'it', 'was', 'like', 'a', 'advantage', 'ticket.', 'i', 'think', 'the', 'amex', 'presale', 'was', 'mostly', 'on', 'ticketmaster', 'thats', 'where', 'i', 'got', 'mine', 'when', 'i', 'bought', 'them', 'for', 'my', 'american', 'tour', 'dates', 'hahaha.', 'honestly', 'no', 'idea', 'tho', 'haha']
Mobile Payment Market and Ecosystem Analysis, Growth Opportunities (Apple, Google, American ...
['mobile', 'payment', 'market', 'and', 'ecosystem', 'analysis', 'growth', 'opportunities', 'apple', 'google', 'american', '...']
More rate drops today on online savings accounts: AMEX falls to .% APY (was .%, peaked at .%) Live Oak Bank falls to .% APY (was .%, peaked at .%)
['more', 'rate', 'drops', 'today', 'on', 'online', 'savings', 'accounts', 'amex', 'falls', '

Not good and really not helpful. I had to physically mail in the dispute paperwork because the website does not allow downloading or emailing or have real humans to talk too. You all are too big and now I’m looking for a new CC company. Amex handles these issues via chat.
['not', 'good', 'and', 'really', 'not', 'helpful.', 'i', 'had', 'to', 'physically', 'mail', 'in', 'the', 'dispute', 'paperwork', 'because', 'the', 'website', 'does', 'not', 'allow', 'downloading', 'or', 'emailing', 'or', 'have', 'real', 'humans', 'to', 'talk', 'too.', 'you', 'all', 'are', 'too', 'big', 'and', 'now', 'im', 'looking', 'for', 'a', 'new', 'cc', 'company.', 'amex', 'handles', 'these', 'issues', 'via', 'chat.']
 year AMEX Platinum member receiving the worst service imaginable. Platinum travel has Incompetent people working. Flight changed, cannot make tour in time, AMEX will not refund points or money
['year', 'amex', 'platinum', 'member', 'receiving', 'the', 'worst', 'service', 'imaginable.', 'platinum', '

major fail - placed order, was cancelled for no reason  minutes later, restaurant had no idea why, and Uber cash including AmEx platinum monthly bonus not immediately returned. Off to seamless I went. First time using and major disappointment.
['major', 'fail', '-', 'placed', 'order', 'was', 'cancelled', 'for', 'no', 'reason', 'minutes', 'later', 'restaurant', 'had', 'no', 'idea', 'why', 'and', 'uber', 'cash', 'including', 'amex', 'platinum', 'monthly', 'bonus', 'not', 'immediately', 'returned.', 'off', 'to', 'seamless', 'i', 'went.', 'first', 'time', 'using', 'and', 'major', 'disappointment.']
i was denied PPP because you still have me noted as deceased. I wrote a complaint to and you admitted the mistake. but you refused to change it. lied
['i', 'was', 'denied', 'ppp', 'because', 'you', 'still', 'have', 'me', 'noted', 'as', 'deceased.', 'i', 'wrote', 'a', 'complaint', 'to', 'and', 'you', 'admitted', 'the', 'mistake.', 'but', 'you', 'refused', 'to', 'change', 'it.', 'lied']
To the Gol

wait, so the only way I can update my User ID for is to "cancel your current online account and re-register with a new User ID"? Really? This seems like a simple feature for your IT team to tackle in ! Update your Web Product Roadmap.
['wait', 'so', 'the', 'only', 'way', 'i', 'can', 'update', 'my', 'user', 'id', 'for', 'is', 'to', 'cancel', 'your', 'current', 'online', 'account', 'and', 're-register', 'with', 'a', 'new', 'user', 'id', 'really', 'this', 'seems', 'like', 'a', 'simple', 'feature', 'for', 'your', 'it', 'team', 'to', 'tackle', 'in', 'update', 'your', 'web', 'product', 'roadmap.']
Hey been waiting for almost a month for a refund on a damaged item you sent. Customer service was useless so did chargeback on . You have just lost a customer...
['hey', 'been', 'waiting', 'for', 'almost', 'a', 'month', 'for', 'a', 'refund', 'on', 'a', 'damaged', 'item', 'you', 'sent.', 'customer', 'service', 'was', 'useless', 'so', 'did', 'chargeback', 'on', '.', 'you', 'have', 'just', 'lost', 'a'

AMEX IS THE WORST- TWO HOURS bounced around to five people three departments -- lot of talk no action -- compared to how helpful CHASE VISA, BofA VISA and Citi - AMEX IS THE WORST!!!!
['amex', 'is', 'the', 'worst-', 'two', 'hours', 'bounced', 'around', 'to', 'five', 'people', 'three', 'departments', '--', 'lot', 'of', 'talk', 'no', 'action', '--', 'compared', 'to', 'how', 'helpful', 'chase', 'visa', 'bofa', 'visa', 'and', 'citi', '-', 'amex', 'is', 'the', 'worst']
PART  after  years you can't remove . during a pandemic. Hey yah maybe I put new acct number in but has the kingdom gotten so big Amex you can't be human so disappointing,
['part', 'after', 'years', 'you', 'cant', 'remove', '.', 'during', 'a', 'pandemic.', 'hey', 'yah', 'maybe', 'i', 'put', 'new', 'acct', 'number', 'in', 'but', 'has', 'the', 'kingdom', 'gotten', 'so', 'big', 'amex', 'you', 'cant', 'be', 'human', 'so', 'disappointing']
Hey -- Why won't you shut down or at least suspend to investigate a money pool that compromi

Market Snapshot: Dow struggles for foothold higher and heads for weekly loss, as investors weigh earnings, coronavirus outlook
['market', 'snapshot', 'dow', 'struggles', 'for', 'foothold', 'higher', 'and', 'heads', 'for', 'weekly', 'loss', 'as', 'investors', 'weigh', 'earnings', 'coronavirus', 'outlook']
Agreed! I've been dealing with this for years. It seems the Amex feels their prepaid customers are bottom of the barrel and not worth good customer service.
['agreed', 'ive', 'been', 'dealing', 'with', 'this', 'for', 'years.', 'it', 'seems', 'the', 'amex', 'feels', 'their', 'prepaid', 'customers', 'are', 'bottom', 'of', 'the', 'barrel', 'and', 'not', 'worth', 'good', 'customer', 'service.']
says & by cardholders fell %
['says', 'by', 'cardholders', 'fell']
Americans are LITERALLY starving here in Florida. What can you do to get people their money who use your SERVE card? They won't answer the phone. Locked out of the website and we can't even get a tweet from you?
['americans', 'are', 

have been with Amex for  years and have been given false information on payment plans for my small business. NO HELP
['have', 'been', 'with', 'amex', 'for', 'years', 'and', 'have', 'been', 'given', 'false', 'information', 'on', 'payment', 'plans', 'for', 'my', 'small', 'business.', 'no', 'help']
Having done all three extensively, Hyatt is pretty amazing, but just not enough location.
['having', 'done', 'all', 'three', 'extensively', 'hyatt', 'is', 'pretty', 'amazing', 'but', 'just', 'not', 'enough', 'location.']
American Express is set to report tomorrow, and options traders are betting on a big bounce from the beaten down financial services company. lays down the trade. AXP
['american', 'express', 'is', 'set', 'to', 'report', 'tomorrow', 'and', 'options', 'traders', 'are', 'betting', 'on', 'a', 'big', 'bounce', 'from', 'the', 'beaten', 'down', 'financial', 'services', 'company.', 'lays', 'down', 'the', 'trade.', 'axp']
reducing customers credit limits during this time of crisis in Ame

Can’t wait to get that black American Express so I can show them white folks how to really pull the race card
['cant', 'wait', 'to', 'get', 'that', 'black', 'american', 'express', 'so', 'i', 'can', 'show', 'them', 'white', 'folks', 'how', 'to', 'really', 'pull', 'the', 'race', 'card']
Kim Carter I am trying to buy a laptop with zero support from the call I made yesterday. So I compared on line today and chose one with all the support etc. The bank thought the charge was fraud on my visa. You didn’t tell me that so I put in my American Express
['kim', 'carter', 'i', 'am', 'trying', 'to', 'buy', 'a', 'laptop', 'with', 'zero', 'support', 'from', 'the', 'call', 'i', 'made', 'yesterday.', 'so', 'i', 'compared', 'on', 'line', 'today', 'and', 'chose', 'one', 'with', 'all', 'the', 'support', 'etc.', 'the', 'bank', 'thought', 'the', 'charge', 'was', 'fraud', 'on', 'my', 'visa.', 'you', 'didnt', 'tell', 'me', 'that', 'so', 'i', 'put', 'in', 'my', 'american', 'express']
I just disputed it with Am

The  Is Delta Air Lines’ Biggest Small Problem. Can It Swap Them For   MAX Aircraft? - Forbes ⁦⁩ If Delta switches to using unsafe Max you should have to refund our Amex cards and give us cash for our miles! This is bullshit!
['the', 'is', 'delta', 'air', 'lines', 'biggest', 'small', 'problem.', 'can', 'it', 'swap', 'them', 'for', 'max', 'aircraft', '-', 'forbes', '\u2066\u2069', 'if', 'delta', 'switches', 'to', 'using', 'unsafe', 'max', 'you', 'should', 'have', 'to', 'refund', 'our', 'amex', 'cards', 'and', 'give', 'us', 'cash', 'for', 'our', 'miles', 'this', 'is', 'bullshit']
The Dow’s Gains Fade Away as More Grim Economic News Lands
['the', 'dows', 'gains', 'fade', 'away', 'as', 'more', 'grim', 'economic', 'news', 'lands']
The led to a three-quarters drop in profit at in the first quarter as its customers stopped spending on travel and entertainment and it piled .bn into reserves to cover expected defaults.
['the', 'led', 'to', 'a', 'three-quarters', 'drop', 'in', 'profit', 'at', 'i

### Step 3: create LSTM model definition - just 1 LSTM layer

In [30]:
class Emojify_V2_LSTM_OneLayer(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim, emb_matrix):
        super(Emojify_V2_LSTM_OneLayer,self).__init__()
        self.embedding = torch.nn.Embedding.from_pretrained(torch.from_numpy(emb_matrix))
        self.embedding.weight.requires_grad = False 
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

        #Initialise weights for equivalent to keras
        for name, p in self.named_parameters():
            if 'lstm' in name:
                if 'weight_ih' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'weight_hh' in name:
                    nn.init.orthogonal_(p.data)
                elif 'bias_ih' in name:
                    p.data.fill_(0)
                      # Set forget-gate bias to 1
                    n = p.size(0)
                    p.data[(n // 4):(n // 2)].fill_(1)
                elif 'bias_hh' in name:
                    p.data.fill_(0)
            elif 'fc' in name:
                if 'weight' in name:
                    nn.init.xavier_uniform_(p.data)
                elif 'bias' in name:
                    p.data.fill_(0)
      
    def forward(self,x):
        embeds = self.embedding(x)
        lstm_out, (hn, cn) = self.lstm(embeds)
        out = self.fc(lstm_out[:, -1, :])
        return out

In [31]:
embedding_dim = emb_dim
output_dim = 2
hidden_dim = 128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 

In [34]:
from torch.utils.data import DataLoader,TensorDataset
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
tensor_x = torch.LongTensor(X_train_indices).to(device)
tensor_y = torch.LongTensor(Y_train).to(device)
my_dataset = TensorDataset(tensor_x,tensor_y)
train_dataloader = DataLoader(my_dataset, batch_size=32, shuffle=True)

In [35]:
model = Emojify_V2_LSTM_OneLayer(embedding_dim, hidden_dim,output_dim, emb_matrix).double()
model.to(device)
print(model)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr =0.0001)


Emojify_V2_LSTM_OneLayer(
  (embedding): Embedding(1193515, 50)
  (lstm): LSTM(50, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=2, bias=True)
)


In [36]:
import torch.nn.functional as f
epochs = 50
model.train()
for epoch in range(epochs):
    training_loss = 0
    training_accuracy = 0
    for inputs, labels in train_dataloader:
        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        training_loss += loss.item()
        softmax = f.softmax(output, dim=1)
        pred = torch.argmax(softmax, dim=1)
        accuracy = accuracy_score(labels, pred)
        training_accuracy += accuracy
    
    print("Traing Loss: " + str(training_loss/len(train_dataloader)) + 
          ", Traing Accuracy: " + str(training_accuracy/len(train_dataloader))+ " for Epoch: " + str(epoch))

Traing Loss: 0.6933079207030617, Traing Accuracy: 0.490625 for Epoch: 0
Traing Loss: 0.692794653022592, Traing Accuracy: 0.5364583333333333 for Epoch: 1
Traing Loss: 0.6923313030109818, Traing Accuracy: 0.5791666666666667 for Epoch: 2
Traing Loss: 0.690326337957309, Traing Accuracy: 0.5197916666666667 for Epoch: 3
Traing Loss: 0.6845066651106537, Traing Accuracy: 0.5177083333333333 for Epoch: 4
Traing Loss: 0.6603879530985741, Traing Accuracy: 0.5697916666666667 for Epoch: 5
Traing Loss: 0.6340253500275791, Traing Accuracy: 0.615625 for Epoch: 6
Traing Loss: 0.5917787325861941, Traing Accuracy: 0.7729166666666667 for Epoch: 7
Traing Loss: 0.535109033852477, Traing Accuracy: 0.8052083333333334 for Epoch: 8
Traing Loss: 0.5155415385394707, Traing Accuracy: 0.8020833333333334 for Epoch: 9
Traing Loss: 0.4757014465042325, Traing Accuracy: 0.8104166666666666 for Epoch: 10
Traing Loss: 0.45215314885755387, Traing Accuracy: 0.8208333333333334 for Epoch: 11
Traing Loss: 0.4588375294648305, Tra

### Step 5: Evaluate model performance

In [37]:
X_test_indices = sentences_to_indices(X_test, words_to_index, max_len = maxlen)

rumor has it amex might be planning additional benefits to their platinum card... meanwhile the sapphire took a downfall with their recent updates (imo anyway....the benefits are useless to me making the price increase not worth it)
['rumor', 'has', 'it', 'amex', 'might', 'be', 'planning', 'additional', 'benefits', 'to', 'their', 'platinum', 'card...', 'meanwhile', 'the', 'sapphire', 'took', 'a', 'downfall', 'with', 'their', 'recent', 'updates', 'imo', 'anyway....the', 'benefits', 'are', 'useless', 'to', 'me', 'making', 'the', 'price', 'increase', 'not', 'worth', 'it']
[Targeted] AmEx Offer: , Spend + & Receive  Statement Credit +  Off +
['targeted', 'amex', 'offer', 'spend', 'receive', 'statement', 'credit', 'off']
HUSA - Proxy Statement - Notice of Shareholders Meeting (preliminary) (pre a)
['husa', '-', 'proxy', 'statement', '-', 'notice', 'of', 'shareholders', 'meeting', 'preliminary', 'pre', 'a']
SimplyCash Preferred from American Express review
['simplycash', 'preferred', 'from',

Deb- it was such a pleasure working with you and seeing all you buillt during your tenure at Amex. You’re an inspiration, amazing person and leader. Excited for your next chapter.
['deb-', 'it', 'was', 'such', 'a', 'pleasure', 'working', 'with', 'you', 'and', 'seeing', 'all', 'you', 'buillt', 'during', 'your', 'tenure', 'at', 'amex.', 'youre', 'an', 'inspiration', 'amazing', 'person', 'and', 'leader.', 'excited', 'for', 'your', 'next', 'chapter.']
Trump never ceases to find ways to screw tax payers!! Too bad he’s not donating the rooms of his gaudy hotels to health care workers for free...like Hilton & American Express are doing by donating  million rooms!! Support Hilton & AMEX!!!
['trump', 'never', 'ceases', 'to', 'find', 'ways', 'to', 'screw', 'tax', 'payers', 'too', 'bad', 'hes', 'not', 'donating', 'the', 'rooms', 'of', 'his', 'gaudy', 'hotels', 'to', 'health', 'care', 'workers', 'for', 'free...like', 'hilton', 'american', 'express', 'are', 'doing', 'by', 'donating', 'million', 'ro

Credit card issuers like American Express and Capital One are adding new ways to earn and redeem rewards during quarantine
['credit', 'card', 'issuers', 'like', 'american', 'express', 'and', 'capital', 'one', 'are', 'adding', 'new', 'ways', 'to', 'earn', 'and', 'redeem', 'rewards', 'during', 'quarantine']
Yea she was so rude and didn’t understand that I didn’t want a credit for my flight, just my money back 🤦‍️.. thankfully my credit card company came through
['yea', 'she', 'was', 'so', 'rude', 'and', 'didnt', 'understand', 'that', 'i', 'didnt', 'want', 'a', 'credit', 'for', 'my', 'flight', 'just', 'my', 'money', 'back', '🤦\u200d️..', 'thankfully', 'my', 'credit', 'card', 'company', 'came', 'through']
Get an AMEX gift card to buy anything you want! PROMO CODE: AMEX || Buy  frames, get  || Buy  frames, get  || Buy  frames, get  || ||
['get', 'an', 'amex', 'gift', 'card', 'to', 'buy', 'anything', 'you', 'want', 'promo', 'code', 'amex', 'buy', 'frames', 'get', 'buy', 'frames', 'get', 'buy

A lot of QB accounts are cloud based these days. So yeah, they have an app. Probably overkill though. If you are an Amex card holder though the integration is pretty fantastic.
['a', 'lot', 'of', 'qb', 'accounts', 'are', 'cloud', 'based', 'these', 'days.', 'so', 'yeah', 'they', 'have', 'an', 'app.', 'probably', 'overkill', 'though.', 'if', 'you', 'are', 'an', 'amex', 'card', 'holder', 'though', 'the', 'integration', 'is', 'pretty', 'fantastic.']
Also American Express points gets you gift cards to Gap, Old Navy, Banana Republic and Athleta. So over the years I’ve bought a lot of staples (and stocking stuffers and birthday gifts...they do good product placement of tchotchkes) at Gap and Old Navy for  kids for “free.”
['also', 'american', 'express', 'points', 'gets', 'you', 'gift', 'cards', 'to', 'gap', 'old', 'navy', 'banana', 'republic', 'and', 'athleta.', 'so', 'over', 'the', 'years', 'ive', 'bought', 'a', 'lot', 'of', 'staples', 'and', 'stocking', 'stuffers', 'and', 'birthday', 'gifts

They said money doesn’t buy you happiness but I can’t say I’ve ever been sad cashing in those AMEX points
['they', 'said', 'money', 'doesnt', 'buy', 'you', 'happiness', 'but', 'i', 'cant', 'say', 'ive', 'ever', 'been', 'sad', 'cashing', 'in', 'those', 'amex', 'points']
This AMEX Rep Made Sure I Kept My Membership Rewards When I Called To Cancel A Card via
['this', 'amex', 'rep', 'made', 'sure', 'i', 'kept', 'my', 'membership', 'rewards', 'when', 'i', 'called', 'to', 'cancel', 'a', 'card', 'via']
Just wanted to give another big shout out to companies who are giving their unused snacks away. Thank you and for your donation!
['just', 'wanted', 'to', 'give', 'another', 'big', 'shout', 'out', 'to', 'companies', 'who', 'are', 'giving', 'their', 'unused', 'snacks', 'away.', 'thank', 'you', 'and', 'for', 'your', 'donation']
This is a very good switch. The AmEx/Delta combo is the best.
['this', 'is', 'a', 'very', 'good', 'switch.', 'the', 'amexdelta', 'combo', 'is', 'the', 'best.']
American Exp

AmEx quarterly profit plunges %
['amex', 'quarterly', 'profit', 'plunges']
This IS BS. YOU GUYS WON'T EVEN ANSWER THE PHONE AT SERVE! YOUR OWN PEOPLE SRE SCARED and HUNGRY and YOU HIDE BEHIND CLOSED DOORS AND UNANSWERED PHONE CALLS. TALK TO US! WHAT YOU ARE DOING IS CRIMINAL.
['this', 'is', 'bs.', 'you', 'guys', 'wont', 'even', 'answer', 'the', 'phone', 'at', 'serve', 'your', 'own', 'people', 'sre', 'scared', 'and', 'hungry', 'and', 'you', 'hide', 'behind', 'closed', 'doors', 'and', 'unanswered', 'phone', 'calls.', 'talk', 'to', 'us', 'what', 'you', 'are', 'doing', 'is', 'criminal.']
AXP AmEx quarterly profit plunges % on . billion reserve build
['axp', 'amex', 'quarterly', 'profit', 'plunges', 'on', '.', 'billion', 'reserve', 'build']
This is bullshit
['this', 'is', 'bullshit']
This is true! In  pulled m capital facility from me  weeks after Lehman collapse ...caused massive problems for . never used Amex since and take every chance to share how horrible they are. A great brand in ter

There's never a dull moment in Marketing ! Thank you so much to our latest Zoom speaker, Anne Love, Director of Marketing at and beloved member of our Marketing Advisory Board. Learn more about our wonderful board members at:
['theres', 'never', 'a', 'dull', 'moment', 'in', 'marketing', 'thank', 'you', 'so', 'much', 'to', 'our', 'latest', 'zoom', 'speaker', 'anne', 'love', 'director', 'of', 'marketing', 'at', 'and', 'beloved', 'member', 'of', 'our', 'marketing', 'advisory', 'board.', 'learn', 'more', 'about', 'our', 'wonderful', 'board', 'members', 'at']
I decided to stop by today, got to my boo, Also this guy. I ordered my drink, pulled out my Amex and the the barkeep, Victoria Valez, ex cop and all around racist, pulled out a knife and demanded I take off my shoes and fight her. New policy?
['i', 'decided', 'to', 'stop', 'by', 'today', 'got', 'to', 'my', 'boo', 'also', 'this', 'guy.', 'i', 'ordered', 'my', 'drink', 'pulled', 'out', 'my', 'amex', 'and', 'the', 'the', 'barkeep', 'victo

In [38]:
tensor_x_ = torch.LongTensor(X_test_indices).to(device)
tensor_y_ = torch.LongTensor(Y_test).to(device)
my_dataset_ = TensorDataset(tensor_x_,tensor_y_)
test_dataloader = DataLoader(my_dataset_, batch_size=32, shuffle=True)

In [52]:
def evaluate(model, dataloader, criterion):
    #initialize every epoch
    loss_ = 0
    accuracy_ = 0
    model.eval()
    with torch.no_grad():
        for inputs, labels in dataloader:
            output = model(inputs)
            print(output)
            softmax = f.softmax(output, dim=1)
            pred = torch.argmax(softmax, dim=1)
            print("pred is: ", pred)
            accuracy_ += accuracy_score(labels, pred) 
#             f1_score += f1_score(labels, pred) 
#             precision += precision_score(labels, pred) 
#             recall += recall_score(labels, pred) 
    return accuracy_ / len(dataloader)

In [43]:
acc = evaluate(model, test_dataloader, criterion)
print()
print("Test accuracy = ", acc)


Test accuracy =  0.5892857142857142


In [44]:
acc = evaluate(model, train_dataloader, criterion)
print()
print("Train accuracy = ", acc)


Train accuracy =  0.921875


In [46]:
X_test

array(['rumor has it amex might be planning additional benefits to their platinum card... meanwhile the sapphire took a downfall with their recent updates (imo anyway....the benefits are useless to me making the price increase not worth it)',
       '[Targeted] AmEx Offer: , Spend $100+ & Receive $30 Statement Credit + $50 Off $150+',
       '$HUSA - Proxy Statement - Notice of Shareholders Meeting (preliminary) (pre 14a)',
       'SimplyCash Preferred from American Express review',
       'Steps To American Express Log In | Log Into Your American Express Online Account',
       'Stocks making the biggest moves in the premarket: American Express, Verizon, Intel, Tesla & more -',
       'Thanks for comment, . We developed a resource center businesses struggling with the changes brought on by COVID-19 – articles, talking points, podcasts and more aim to help us see the light at the end of the tunnel. Check it out at',
       'That’s the catch, you need to use those points at that level b

In [None]:
#code below is to score a new sample sentence

In [50]:
max_test=np.array(['i like amex'])
Y_test_max = np.array([1])

In [51]:
X_test_indices2 = sentences_to_indices(max_test, words_to_index, max_len = maxlen)
tensor_x_ = torch.LongTensor(X_test_indices2).to(device)
tensor_y_ = torch.LongTensor(Y_test_max).to(device)
my_dataset_ = TensorDataset(tensor_x_,tensor_y_)
test_dataloader_max = DataLoader(my_dataset_, batch_size=32, shuffle=True)

i like amex
['i', 'like', 'amex']


In [49]:
Y_test

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [53]:
evaluate(model, test_dataloader_max, criterion)

tensor([[-1.3670,  0.9496]], dtype=torch.float64)
pred is:  tensor([1])


1.0