In [1]:
import pandas as pd
import numpy as np
import re
import scipy

### Creating dataframe from csv

In [2]:
df = pd.read_csv('tweet_csvs/realDonaldTrump_tweets.csv', index_col = None, header = 0, 
                     parse_dates=['created_at'], infer_datetime_format = True, dayfirst = True)

In [3]:
df_1 = pd.read_csv('tweet_csvs/realDonaldTrump_tweets.csv', index_col = None, header = 0, 
                     parse_dates=['created_at'], infer_datetime_format = True, dayfirst = True)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3567 entries, 0 to 3566
Data columns (total 3 columns):
id            3567 non-null int64
created_at    3567 non-null datetime64[ns]
text          3567 non-null object
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 83.7+ KB


In [5]:
df_1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3567 entries, 0 to 3566
Data columns (total 3 columns):
id            3567 non-null int64
created_at    3567 non-null datetime64[ns]
text          3567 non-null object
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 83.7+ KB


In [6]:
df.head(2)

Unnamed: 0,id,created_at,text
0,820251730407473153,2017-01-14 12:50:26,Congressman John Lewis should spend more time ...
1,820255947956383744,2017-01-14 13:07:12,mention crime infested) rather than falsely co...


In [7]:
df.tail(2)

Unnamed: 0,id,created_at,text
3565,1020287981020729344,2018-07-20 12:43:05,"China, the European Union and others have been..."
3566,1020285014616002560,2018-07-20 12:31:18,My deepest sympathies to the families and frie...


# Data Pre-processing

In [8]:
def remove_by_regex(tweets, regexp):
        tweets.loc[:, "text"].replace(regexp, " ", inplace = True)
        return tweets

Remove URLs

In [9]:
#remove_by_regex(df_1, re.compile(r"http.?://[^\s]+[\s]?"));

In [10]:
df['text_no_urls'] = remove_by_regex(df_1, re.compile(r"http.?://[^\s]+[\s]?"))['text']

In [11]:
df.head()

Unnamed: 0,id,created_at,text,text_no_urls
0,820251730407473153,2017-01-14 12:50:26,Congressman John Lewis should spend more time ...,Congressman John Lewis should spend more time ...
1,820255947956383744,2017-01-14 13:07:12,mention crime infested) rather than falsely co...,mention crime infested) rather than falsely co...
2,820257714362314753,2017-01-14 13:14:13,INTELLIGENCE INSIDERS NOW CLAIM THE TRUMP DOSS...,INTELLIGENCE INSIDERS NOW CLAIM THE TRUMP DOSS...
3,820425770925338624,2017-01-15 00:22:01,Congressman John Lewis should finally focus on...,Congressman John Lewis should finally focus on...
4,820450166331346944,2017-01-15 01:58:57,Inauguration Day is turning out to be even big...,Inauguration Day is turning out to be even big...


In [12]:
df['text'][55]

'TO ALL AMERICANS🇺🇸 https://t.co/D7Es6ie4fY'

In [13]:
df['text_no_urls'][55]

'TO ALL AMERICANS🇺🇸  '

Remove usernames (mentions)

In [14]:
df['text_no_urls_names'] = remove_by_regex(df_1, re.compile(r"@[^\s]+[\s]?"))['text']

In [15]:
df['text'][55]

'TO ALL AMERICANS🇺🇸 https://t.co/D7Es6ie4fY'

In [16]:
df['text_no_urls_names'][55]

'TO ALL AMERICANS🇺🇸  '

Remove numbers

In [17]:
df['text_no_urls_names_nums'] = remove_by_regex(df_1, re.compile(r"\s?[0-9]+\.?[0-9]*"))['text']

In [18]:
df['text'][4]

'Inauguration Day is turning out to be even bigger than expected. January 20th, Washington D.C. Have fun!'

In [19]:
df['text_no_urls_names_nums'][4]

'Inauguration Day is turning out to be even bigger than expected. January th, Washington D.C. Have fun!'

Remove punctuation, special symbols and converts hashtags to "normal" words)

In [20]:
for remove in map(lambda r: re.compile(re.escape(r)), 
                  [",", ":", "\"", "=", "&", ";", "%", "$", "@", "%", "^", "*", "(", ")", "{", "}",
                   "[", "]", "|", "/", "\\", ">", "<", "-", "!", "?", ".", "'", "--", "---", "#", "..."]
                 ):
    df_1.loc[:, "text"].replace(remove, " ", inplace=True)

In [21]:
df['text_cleaned'] = df_1['text']

In [22]:
df['text'][40]

'On my way! #Inauguration2017 https://t.co/hOuMbxGnpe'

In [23]:
df['text_cleaned'][40]

'On my way   Inauguration   '

In [24]:
df.drop(labels = ['text_no_urls', 'text_no_urls_names', 'text_no_urls_names_nums'], axis = 1, inplace = True)

In [25]:
df.columns

Index(['id', 'created_at', 'text', 'text_cleaned'], dtype='object')

## Stemming

In [26]:
from nltk import PorterStemmer

In [27]:
stemmer = PorterStemmer()

In [28]:
df['text_stemming'] = df['text_cleaned'].apply(lambda sequence: ' '.join(stemmer.stem(word) for word in sequence.lower().split()))

In [29]:
df['text'][25]

'RT @MoskowitzEva: .@BetsyDeVos has the talent, commitment, and leadership capacity to revitalize our public schools and deliver the promise…'

In [30]:
df['text_stemming'][25]

'rt ha the talent commit and leadership capac to revit our public school and deliv the promise…'

***

## Train sets

In [31]:
df_train = df['text'].tolist()

In [32]:
df_cleaned_train = df['text_cleaned'].tolist()

In [33]:
df_stemmed_train = df['text_stemming'].tolist()

### CountVectorizer

In [34]:
from sklearn.feature_extraction.text import CountVectorizer

In [35]:
vect_bow = CountVectorizer(ngram_range=(1, 1), stop_words = 'english')
vect_cleaned_bow = CountVectorizer(ngram_range=(1, 1), stop_words = 'english')
vect_stemmed_bow = CountVectorizer(ngram_range=(1, 1), stop_words = 'english')

In [36]:
trump_bow = vect_bow.fit_transform(df_train)
trump_cleaned_bow = vect_cleaned_bow.fit_transform(df_cleaned_train)
trump_stemmed_bow = vect_stemmed_bow.fit_transform(df_stemmed_train)

In [37]:
trump_bow.shape, trump_cleaned_bow.shape, trump_stemmed_bow.shape

((3567, 8486), (3567, 6582), (3567, 4885))

### TF-IDF

In [38]:
from sklearn.feature_extraction.text import TfidfVectorizer
#from collections import Counter

# We can use the TfidfVectorizer to find ngrams for us
vect_tfidf = TfidfVectorizer(ngram_range=(1, 1), stop_words = 'english')
vect_cleaned_tfidf = TfidfVectorizer(ngram_range=(1, 1), stop_words = 'english')
vect_stemmed_tfidf = TfidfVectorizer(ngram_range=(1, 1), stop_words = 'english')

# Pulls all of trumps tweet text's into one giant string
#summaries = "".join(df['text'])
#ngrams_summaries = vect_tfidf.build_analyzer()(summaries)

#Counter(ngrams_summaries).most_common(20)

In [39]:
trump_tfidf = vect_tfidf.fit_transform(df_train)
trump_cleaned_tfidf = vect_cleaned_tfidf.fit_transform(df_cleaned_train)
trump_stemmed_tfidf = vect_stemmed_tfidf.fit_transform(df_stemmed_train)

In [40]:
trump_tfidf.shape, trump_cleaned_tfidf.shape, trump_stemmed_tfidf.shape

((3567, 8486), (3567, 6582), (3567, 4885))

## Test Strings

In [41]:
#test sentence
test_sentence = 'To all the little girls watching...never doubt that you are valuable and powerful & deserving of every chance & opportunity in the world.'
test = ['Come on and kill Kenny!', 
        'Make America great again!', 
        'Beer... Beeeeer... Beeeeeeeeer... WOO-HOO!']

In [42]:
test_bow = vect_bow.transform(test)
test_tfidf = vect_tfidf.transform(test)

In [43]:
test_bow.shape, test_tfidf.shape

((3, 8486), (3, 8486))

Cleaning test sentences

In [44]:
test = pd.Series(test)

In [45]:
test_cleaned = test.replace(re.compile(r"http.?://[^\s]+[\s]?"))
test_cleaned = test_cleaned.replace(re.compile(r"@[^\s]+[\s]?"))
test_cleaned = test_cleaned.replace(re.compile(r"\s?[0-9]+\.?[0-9]*"))

In [46]:
test_sentence = pd.Series(test_sentence)
test_sentence[0]

'To all the little girls watching...never doubt that you are valuable and powerful & deserving of every chance & opportunity in the world.'

In [47]:
test_sentence = test_sentence.replace(re.compile(r"http.?://[^\s]+[\s]?"))
test_sentence[0]

'To all the little girls watching...never doubt that you are valuable and powerful & deserving of every chance & opportunity in the world.'

In [48]:
test_sentence = test_sentence.replace(re.compile(r"@[^\s]+[\s]?"))
test_sentence[0]

'To all the little girls watching...never doubt that you are valuable and powerful & deserving of every chance & opportunity in the world.'

In [49]:
test_sentence = test_sentence.replace(re.compile(r"\s?[0-9]+\.?[0-9]*"))
test_sentence[0]

'To all the little girls watching...never doubt that you are valuable and powerful & deserving of every chance & opportunity in the world.'

In [50]:
for remove in map(lambda r: re.compile(re.escape(r)), 
                  [",", ":", "\"", "=", "&", ";", "%", "$", "@", "%", "^", "*", "(", ")", "{", "}",
                   "[", "]", "|", "/", "\\", ">", "<", "-", "!", "?", ".", "'", "--", "---", "#", "..."]
                 ):
    test.replace(remove, " ", inplace=True)

In [51]:
for remove in map(lambda r: re.compile(re.escape(r)), 
                  [",", ":", "\"", "=", "&", ";", "%", "$", "@", "%", "^", "*", "(", ")", "{", "}",
                   "[", "]", "|", "/", "\\", ">", "<", "-", "!", "?", ".", "'", "--", "---", "#", "..."]
                 ):
    test_sentence.replace(remove, " ", inplace=True)

In [52]:
test_sentence[0]

'To all the little girls watching   never doubt that you are valuable and powerful   deserving of every chance   opportunity in the world '

In [53]:
test_cleaned = test

In [54]:
test_cleaned

0                       Come on and kill Kenny 
1                     Make America great again 
2    Beer    Beeeeer    Beeeeeeeeer    WOO HOO 
dtype: object

Stemming test sentences

In [55]:
test_sentence_stemmed = test_sentence.apply(lambda sequence: ' '.join(stemmer.stem(word) for word in sequence.lower().split()))
test_sentence_stemmed[0]

'to all the littl girl watch never doubt that you are valuabl and power deserv of everi chanc opportun in the world'

In [56]:
test_stemmed = test_cleaned.apply(lambda sequence: ' '.join(stemmer.stem(word) for word in sequence.lower().split()))

In [57]:
test_stemmed

0              come on and kill kenni
1            make america great again
2    beer beeeeer beeeeeeeeer woo hoo
dtype: object

In [58]:
test_cleaned_bow = vect_cleaned_bow.transform(test_cleaned)
test_stemmed_bow = vect_stemmed_bow.transform(test_stemmed)
test_cleaned_tfidf = vect_cleaned_tfidf.transform(test_cleaned)
test_stemmed_tfidf = vect_stemmed_tfidf.transform(test_stemmed)

In [59]:
test_tfidf.shape, test_cleaned_tfidf.shape, test_stemmed_tfidf.shape

((3, 8486), (3, 6582), (3, 4885))

### OneClassSVM

In [60]:
from sklearn import svm

In [61]:
ocsvm_bow = svm.OneClassSVM(nu = 0.5, kernel = 'rbf', gamma = 0.1)
ocsvm_cleaned_bow = svm.OneClassSVM(nu = 0.5, kernel = 'rbf', gamma = 0.1)
ocsvm_stemmed_bow = svm.OneClassSVM(nu = 0.5, kernel = 'rbf', gamma = 0.1)

ocsvm_tfidf = svm.OneClassSVM(nu = 0.5, kernel = 'rbf', gamma = 0.1)
ocsvm_cleaned_tfidf = svm.OneClassSVM(nu = 0.5, kernel = 'rbf', gamma = 0.1)
ocsvm_stemmed_tfidf = svm.OneClassSVM(nu = 0.5, kernel = 'rbf', gamma = 0.1)

In [62]:
y_true_bow = [1 for i in range(trump_bow.shape[0])]
y_true_cleaned_bow = [1 for i in range(trump_cleaned_bow.shape[0])]
y_true_stemmed_bow = [1 for i in range(trump_stemmed_bow.shape[0])]
y_true_tfidf = [1 for i in range(trump_tfidf.shape[0])]
y_true_cleaned_tfidf = [1 for i in range(trump_cleaned_tfidf.shape[0])]
y_true_stemmed_tfidf = [1 for i in range(trump_stemmed_tfidf.shape[0])]

No pre-processing

In [63]:
ocsvm_bow.fit(trump_bow, y = y_true_bow)
prediction_bow = ocsvm_bow.predict(test_bow)
prediction_bow

array([1, 1, 1])

In [64]:
ocsvm_tfidf.fit(trump_tfidf, y = y_true_tfidf)
prediction_tfidf = ocsvm_tfidf.predict(test_tfidf)
prediction_tfidf

array([-1,  1,  1])

Cleaned data

In [65]:
ocsvm_cleaned_bow.fit(trump_cleaned_bow, y = y_true_cleaned_bow)
prediction_cleaned_bow = ocsvm_cleaned_bow.predict(test_cleaned_bow)
prediction_cleaned_bow

array([1, 1, 1])

In [66]:
ocsvm_cleaned_tfidf.fit(trump_cleaned_tfidf, y = y_true_cleaned_tfidf)
prediction_cleaned_tfidf = ocsvm_cleaned_tfidf.predict(test_cleaned_tfidf)
prediction_cleaned_tfidf

array([-1,  1,  1])

Stemmed data

In [67]:
ocsvm_stemmed_bow.fit(trump_stemmed_bow, y = y_true_stemmed_bow)
prediction_stemmed_bow = ocsvm_stemmed_bow.predict(test_stemmed_bow)
prediction_stemmed_bow

array([1, 1, 1])

In [68]:
ocsvm_stemmed_tfidf.fit(trump_stemmed_tfidf, y = y_true_stemmed_tfidf)
prediction_stemmed_tfidf = ocsvm_stemmed_tfidf.predict(test_stemmed_tfidf)
prediction_stemmed_tfidf

array([-1,  1,  1])

### word2vec

In [69]:
from gensim.models import Word2Vec

In [70]:
from nltk.tokenize import wordpunct_tokenize, TweetTokenizer, RegexpTokenizer

In [71]:
regexp_tok = RegexpTokenizer(r'\w+')

In [72]:
%%time
tokenized_tweets = []
for tweet in df_train:
    tokenized = regexp_tok.tokenize(tweet)
    tokenized_tweets.append(tokenized)

CPU times: user 46.7 ms, sys: 3.81 ms, total: 50.5 ms
Wall time: 49.6 ms


In [73]:
%%time
w2v_model = Word2Vec(tokenized_tweets, min_count = 1)

CPU times: user 1.91 s, sys: 20.6 ms, total: 1.94 s
Wall time: 944 ms


In [74]:
df_train_tokenized = []
for tweet in df_train:
    tweet_tokenized = regexp_tok.tokenize(tweet)
    df_train_tokenized.append(tweet_tokenized)

In [75]:
%%time
wmd_list = []
for sentence in test:
    wmd_list_temp = []
    for tweet in df_train_tokenized:
        wmd = w2v_model.wv.wmdistance(tweet, regexp_tok.tokenize(sentence))
        wmd_list_temp.append(wmd)
    wmd_list.append(np.max(wmd_list_temp))

CPU times: user 28.6 s, sys: 49.7 ms, total: 28.6 s
Wall time: 28.6 s


In [76]:
for i in wmd_list:
    if np.isinf(i) == False:
        print('{} is a valid value'.format(i))
    else:
        print('{} is NOT a valid value'.format(i))

6.101997933602571 is a valid value
6.198953959381104 is a valid value
inf is NOT a valid value


### Cosine Similarity & Tests

In [77]:
from sklearn.metrics.pairwise import cosine_similarity

In [78]:
test

0                       Come on and kill Kenny 
1                     Make America great again 
2    Beer    Beeeeer    Beeeeeeeeer    WOO HOO 
dtype: object

In [79]:
test_cleaned

0                       Come on and kill Kenny 
1                     Make America great again 
2    Beer    Beeeeer    Beeeeeeeeer    WOO HOO 
dtype: object

In [80]:
test_stemmed

0              come on and kill kenni
1            make america great again
2    beer beeeeer beeeeeeeeer woo hoo
dtype: object

In [81]:
i = 0

for sentence in test:
    sentence_bow = vect_bow.transform([sentence])
    cos_dists_bow = cosine_similarity(trump_bow, sentence_bow)
    
    sentence_tfidf = vect_tfidf.transform([sentence])
    cos_dists_tfidf = cosine_similarity(trump_tfidf, sentence_tfidf)
    
    mean_cos_dist = np.mean([np.max(cos_dists_bow), np.max(cos_dists_tfidf)])
    
    print('>>> {}'.format(sentence))
    print('Cosine similarity Bag-Of-Words: {}'.format(round(np.max(cos_dists_bow), 3)))
    print('Cosine similarity TF-IDF: {}'.format(round(np.max(cos_dists_tfidf), 3)))
    print('Mean Cosine Similarity: {}\n'.format(round(mean_cos_dist, 3)))
    print('OneClassSVM BOW prediction: {}'.format(prediction_bow[i]))
    print('OneClassSVM TF-IDF prediction: {}'.format(prediction_tfidf[i]))
    print('===========================================\n')
    #print('Word2Vec Word Mover`s Distance: {}\n'.format(wmd_list[i]))
    i += 1

>>> Come on and kill Kenny 
Cosine similarity Bag-Of-Words: 0.267
Cosine similarity TF-IDF: 0.329
Mean Cosine Similarity: 0.298

OneClassSVM BOW prediction: 1
OneClassSVM TF-IDF prediction: -1

>>> Make America great again 
Cosine similarity Bag-Of-Words: 1.0
Cosine similarity TF-IDF: 1.0
Mean Cosine Similarity: 1.0

OneClassSVM BOW prediction: 1
OneClassSVM TF-IDF prediction: 1

>>> Beer    Beeeeer    Beeeeeeeeer    WOO HOO 
Cosine similarity Bag-Of-Words: 0.0
Cosine similarity TF-IDF: 0.0
Mean Cosine Similarity: 0.0

OneClassSVM BOW prediction: 1
OneClassSVM TF-IDF prediction: 1



In [82]:
i = 0

for sentence in test_cleaned:
    sentence_cleaned_bow = vect_cleaned_bow.transform([sentence])
    cos_dists_cleaned_bow = cosine_similarity(trump_cleaned_bow, sentence_cleaned_bow)
    
    sentence_cleaned_tfidf = vect_cleaned_tfidf.transform([sentence])
    cos_dists_cleaned_tfidf = cosine_similarity(trump_cleaned_tfidf, sentence_cleaned_tfidf)

    mean_cos_cleaned_dist = np.mean([np.max(cos_dists_cleaned_bow), np.max(cos_dists_cleaned_tfidf)])
    
    print('>>> {}'.format(sentence))
    print('Cosine similarity Bag-Of-Words (cleaned): {}'.format(round(np.max(cos_dists_cleaned_bow), 3)))
    print('Cosine similarity TF-IDF (cleaned): {}'.format(round(np.max(cos_dists_cleaned_tfidf), 3)))
    print('Mean Cosine Similarity (cleaned): {}\n'.format(round(mean_cos_cleaned_dist, 3)))
    print('OneClassSVM BOW prediction (cleaned): {}'.format(prediction_cleaned_bow[i]))
    print('OneClassSVM TF-IDF prediction (cleaned): {}'.format(prediction_cleaned_tfidf[i]))
    print('===========================================\n')
    #print('Word2Vec Word Mover`s Distance: {}\n'.format(wmd_list[i]))
    i += 1

>>> Come on and kill Kenny 
Cosine similarity Bag-Of-Words (cleaned): 0.267
Cosine similarity TF-IDF (cleaned): 0.329
Mean Cosine Similarity (cleaned): 0.298

OneClassSVM BOW prediction (cleaned): 1
OneClassSVM TF-IDF prediction (cleaned): -1

>>> Make America great again 
Cosine similarity Bag-Of-Words (cleaned): 1.0
Cosine similarity TF-IDF (cleaned): 1.0
Mean Cosine Similarity (cleaned): 1.0

OneClassSVM BOW prediction (cleaned): 1
OneClassSVM TF-IDF prediction (cleaned): 1

>>> Beer    Beeeeer    Beeeeeeeeer    WOO HOO 
Cosine similarity Bag-Of-Words (cleaned): 0.0
Cosine similarity TF-IDF (cleaned): 0.0
Mean Cosine Similarity (cleaned): 0.0

OneClassSVM BOW prediction (cleaned): 1
OneClassSVM TF-IDF prediction (cleaned): 1



Stemmed test text

In [83]:
i = 0

for sentence in test_stemmed:
    sentence_stemmed_bow = vect_stemmed_bow.transform([sentence])
    cos_dists_stemmed_bow = cosine_similarity(trump_stemmed_bow, sentence_stemmed_bow)
    
    sentence_stemmed_tfidf = vect_stemmed_tfidf.transform([sentence])
    cos_dists_stemmed_tfidf = cosine_similarity(trump_stemmed_tfidf, sentence_stemmed_tfidf)

    mean_cos_stemmed_dist = np.mean([np.max(cos_dists_stemmed_bow), np.max(cos_dists_stemmed_tfidf)])
    
    print('>>> {}'.format(sentence))
    print('Cosine similarity Bag-Of-Words (stemmed): {}'.format(round(np.max(cos_dists_stemmed_bow), 3)))
    print('Cosine similarity TF-IDF (stemmed): {}'.format(round(np.max(cos_dists_stemmed_tfidf), 3)))
    print('Mean Cosine Similarity (stemmed): {}\n'.format(round(mean_cos_stemmed_dist, 3)))
    print('OneClassSVM BOW prediction (stemmed): {}'.format(prediction_stemmed_bow[i]))
    print('OneClassSVM TF-IDF prediction (stemmed): {}'.format(prediction_stemmed_tfidf[i]))
    print('===========================================\n')
    #print('Word2Vec Word Mover`s Distance: {}\n'.format(wmd_list[i]))
    i += 1

>>> come on and kill kenni
Cosine similarity Bag-Of-Words (stemmed): 0.416
Cosine similarity TF-IDF (stemmed): 0.409
Mean Cosine Similarity (stemmed): 0.412

OneClassSVM BOW prediction (stemmed): 1
OneClassSVM TF-IDF prediction (stemmed): -1

>>> make america great again
Cosine similarity Bag-Of-Words (stemmed): 1.0
Cosine similarity TF-IDF (stemmed): 1.0
Mean Cosine Similarity (stemmed): 1.0

OneClassSVM BOW prediction (stemmed): 1
OneClassSVM TF-IDF prediction (stemmed): 1

>>> beer beeeeer beeeeeeeeer woo hoo
Cosine similarity Bag-Of-Words (stemmed): 0.0
Cosine similarity TF-IDF (stemmed): 0.0
Mean Cosine Similarity (stemmed): 0.0

OneClassSVM BOW prediction (stemmed): 1
OneClassSVM TF-IDF prediction (stemmed): 1



### Saving models

In [84]:
from sklearn.externals import joblib

#joblib.dump(df_train_tokenized, 'models/df_train_tokenized.pkl')

#BOW Models
joblib.dump(vect_bow, 'models/vect_bow.pkl')
joblib.dump(vect_cleaned_bow, 'models/vect_cleaned_bow.pkl')
joblib.dump(vect_stemmed_bow, 'models/vect_stemmed_bow.pkl')

joblib.dump(trump_bow, 'models/trump_bow.pkl')
joblib.dump(trump_cleaned_bow, 'models/trump_cleaned_bow.pkl')
joblib.dump(trump_stemmed_bow, 'models/trump_stemmed_bow.pkl')

#TF-IDF Models
joblib.dump(vect_tfidf, 'models/vect_tfidf.pkl')
joblib.dump(vect_cleaned_tfidf, 'models/vect_cleaned_tfidf.pkl')
joblib.dump(vect_stemmed_tfidf, 'models/vect_stemmed_tfidf.pkl')

joblib.dump(trump_tfidf, 'models/trump_tfidf.pkl')
joblib.dump(trump_cleaned_tfidf, 'models/trump_cleaned_tfidf.pkl')
joblib.dump(trump_stemmed_tfidf, 'models/trump_stemmed_tfidf.pkl')

#OneClassSVM Model
joblib.dump(ocsvm_bow, 'models/ocsvm_bow.pkl')
joblib.dump(ocsvm_cleaned_bow, 'models/ocsvm_cleaned_bow.pkl')
joblib.dump(ocsvm_stemmed_bow, 'models/ocsvm_stemmed_bow.pkl')
joblib.dump(ocsvm_tfidf, 'models/ocsvm_tfidf.pkl')
joblib.dump(ocsvm_cleaned_tfidf, 'models/ocsvm_cleaned_tfidf.pkl')
joblib.dump(ocsvm_stemmed_tfidf, 'models/ocsvm_stemmed_tfidf.pkl')

#joblib.dump(w2v_model, 'models/w2v.pkl')

['models/ocsvm_stemmed_tfidf.pkl']