In [1]:
import pandas as pd
import numpy as np
import ast
import random
import re

from collections import Counter, defaultdict 

import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import wordnet as wn
nltk.download('averaged_perceptron_tagger')


from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

import matplotlib.pyplot as plt

import inflect
p = inflect.engine()

import warnings
warnings.filterwarnings('ignore')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\thrdl\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
Using TensorFlow backend.


In [2]:
def get_words(df):
    sentences = df.text.to_list()    
    flat_list = []
    for sublist in sentences:
        for item in sublist:
            flat_list.append(item)           
    return flat_list

def get_nouns(words):
    nouns = []
    porter = PorterStemmer()
    for word, tag in words:
            if p.singular_noun(word):
                nouns.append(p.singular_noun(word)) #plural nouns to singular nouns
            elif tag.startswith('NN'):
                nouns.append(word)
            #nouns.append(porter.stem(word)) #convert words to their basic form
    return nouns

def get_noun_pairs(words):
    nouns = []
    noun_pairs = []
    porter = PorterStemmer()
    for i, (word, tag) in enumerate(words):
        if i > 0 and tag.startswith('NN') and (words[i-1][1].startswith('NN') or words[i-1][1].startswith('JJ') or words[i-1][1].startswith('PRP')):
            w1 = p.singular_noun(word) if p.singular_noun(word) else word 
            w0 = p.singular_noun(words[i-1][0]) if p.singular_noun(words[i-1][0]) else words[i-1][0]
            noun_pairs.append(w0+" "+w1)
        
        if p.singular_noun(word):
            nouns.append(p.singular_noun(word)) #plural nouns to singular nouns
        elif tag.startswith('NN'):
            nouns.append(word)
            #nouns.append(porter.stem(word)) #convert words to their basic form
    return nouns, noun_pairs

def bow(sentence, corpus, pairs):
    bag_vector = np.zeros(len(corpus)+len(pairs))
    for w in sentence:     
        if p.singular_noun(w):
            w = p.singular_noun(w)
        for i,word in enumerate(corpus):
            if word == w:
                bag_vector[i] += 1
    
    words_together = " ".join(sentence)
    for i, pair in enumerate(pairs):
        ctn = len(re.findall(pair, words_together))
        bag_vector[len(corpus)+i] = ctn
    
    return pd.Series(bag_vector)

def bow_sentiment(sentence, corpus, sentiment, pairs):
    bag_vector = np.zeros(len(corpus)*3 + len(pairs))
    #each sentence (in this case comment) has its own dictionary
    for w, sentiment_dic in zip(sentence, sentiment):
        #each word in a post that happens to be in corpus is also in sentiment_dic
        if p.singular_noun(w):
            w = p.singular_noun(w)
        if w in sentiment_dic:
            #in this case corpus is only needed for indexing - this can be changed by passing a dictionary [word] -> index in bag_vector
            i = corpus.index(w)
            bag_vector[i*3:i*3+3] = sentiment_dic[w]
            
    words_together = " ".join(sentence)
    for i, pair in enumerate(pairs):
        ctn = len(re.findall(pair, words_together))
        bag_vector[len(corpus)*3+i] = ctn
    
    return pd.Series(bag_vector)

def get_used_noun_frequency(dic_1, dic_2, threshold_significance = 10):
    significant_nouns = {}
    for key, val in dic_1.items():
        significant_nouns[key] = [val, 0]
    
    for key, val in dic_2.items():
        if key in significant_nouns:
            significant_nouns[key] = [significant_nouns[key][0], val]
        else:
            significant_nouns[key] = [0, val]
            
    for key, val in significant_nouns.copy().items():
        if (val[0] < threshold_significance) and (val[1] < threshold_significance):
            del significant_nouns[key]
        
    print(f"number of significant nouns: {len(significant_nouns.keys())}")
    return significant_nouns

def get_imbalanced_nouns(dic, coef = 2.2):
    nouns_of_interest = []
    
    for key, val in dic.items():
        if max(val[0], val[1]) - min(val[0], val[1]) * coef > 0:
            nouns_of_interest.append(key)
    print(f"number of nouns of interest: {len(nouns_of_interest)}")
    return nouns_of_interest

def get_nouns_and_their_synonims(nouns_of_interest, dic_all_nouns, threshold = 0.9):
    dic_of_noun_differences = {}
    
    for noun in nouns_of_interest:
            word_meanings = wn.synsets(noun, 'n')
            for word_meaning in word_meanings:
                for key, val in dic_all_nouns.items():
                    if key != noun:

                            word_meanings_target = wn.synsets(key, 'n')
                            for word_meaning_target in word_meanings_target:
                                if word_meaning.wup_similarity(word_meaning_target) > threshold:
                                    if noun not in dic_of_noun_differences:
                                        dic_of_noun_differences[noun] = set()
                                        dic_of_noun_differences[noun].add(key)
                                    else:
                                        dic_of_noun_differences[noun].add(key)
    return dic_of_noun_differences

def nouns_synonyms_set_of_significance(dic_of_noun_differences, dic_noun_frequency):
    for key, val in dic_of_noun_differences.items():
        #key = word, val = set of synonyms
        for noun in val.copy():
            if (dic_noun_frequency[key][0] >= dic_noun_frequency[key][1] and dic_noun_frequency[noun][0] >= dic_noun_frequency[noun][1]) or (dic_noun_frequency[key][0] <= dic_noun_frequency[key][1] and dic_noun_frequency[noun][0] <= dic_noun_frequency[noun][1]):
                val.remove(noun)
    return dic_of_noun_differences

def print_nouns_stats(dic_of_noun_differences, dic_noun_frequency):
    for key, val in dic_of_noun_differences.items():
        for noun in val:
            print(f"{key}: {dic_noun_frequency[key][0]} - {dic_noun_frequency[key][1]}, {noun}: {dic_noun_frequency[noun][0]} - {dic_noun_frequency[noun][1]}")
            
def shuffle_lists_together(a, b):
    return shuffle(a, b)

In [3]:
def filter_dict(freq_dict, thr):
     return {x : freq_dict[x] for x in freq_dict.keys() if freq_dict[x] >= thr}

def optimal_threshold(freq_dict, plot=False):
    
    opt_thr, n_words = 0, len(freq_dict)
    
    if plot:
        fig = plt.figure(figsize=(15, 10))
        fig.subplots_adjust(hspace=0.75, wspace=0.5)
    
    
    for ix, thr in enumerate(range(2, 11)):
        fil_frequency = filter_dict(freq_dict, thr)
        
        if (n_words - len(fil_frequency)) >= 50:
            n_words = len(fil_frequency)
            opt_thr = thr 
        else:
            n_words = len(fil_frequency)
        
        if plot:
            x = np.arange(len(fil_frequency))
            y = np.array(list(fil_frequency.values()))  
            ax = fig.add_subplot(5, 2, ix+1)
            ax.plot(x, y)
            ax.set_title('Threshold = '+ str(thr))
            ax.set_ylabel('frequency')
            ax.set_xlabel('# of words')
    
    if plot:
        plt.show()
    
    return opt_thr

In [4]:
def lr_model(X_train, y_train):
    pipe = Pipeline([('classifier' , LogisticRegression())])

    param_grid = [
        {'classifier' : [LogisticRegression()],
         'classifier__penalty' : ['l1', 'l2'],
        'classifier__C' : np.logspace(-4, 4, 20),
        'classifier__solver' : ['liblinear']}
    ]

    clf = GridSearchCV(pipe, param_grid = param_grid, cv = 5, n_jobs=-1)

    best_clf = clf.fit(X_train, y_train)
    return best_clf

In [5]:
def threshold_similarity(word_meanings_1, word_meanings_2, threshold):
    for meaning_1 in word_meanings_1:
        for meaning_2 in word_meanings_2:
            sim = meaning_1.wup_similarity(meaning_2)
            if sim > threshold:
                return True
    return False

In [6]:
def noun_disparity(noun_1, noun_2, freq_dict_1, freq_dict_2, threshold):
        
    freq_1_1 = freq_dict_1[noun_1]/(freq_dict_1[noun_1] + freq_dict_2[noun_1])
    freq_1_2 = freq_dict_2[noun_1]/(freq_dict_1[noun_1] + freq_dict_2[noun_1])
    
    freq_2_1 = freq_dict_1[noun_2]/(freq_dict_1[noun_2] + freq_dict_2[noun_2])
    freq_2_2 = freq_dict_2[noun_2]/(freq_dict_1[noun_2] + freq_dict_2[noun_2])
    
    if ((freq_1_1 - freq_1_2) > threshold and (freq_2_2 - freq_2_1) > threshold) or ((freq_1_2 - freq_1_1) > threshold and (freq_2_1 - freq_2_2) > threshold):
            return True
    return False
        

In [7]:
def get_similar_pairs(freq_dict_1, freq_dict_2):
    pairs = []

    for noun_1 in freq_dict_1.keys():
        word_meanings_1 = wn.synsets(noun_1, 'n')
        for noun_2 in freq_dict_2.keys():
            if noun_1 != noun_2:
                word_meanings_2 = wn.synsets(noun_2, 'n')
                # hyperparams in next line (threshold_similarity, noun_disparity)
                if threshold_similarity(word_meanings_1, word_meanings_2, 0.75) and noun_disparity(noun_1, noun_2, fil_frequency_nouns_1, fil_frequency_nouns_2, 0.15):
                    if (noun_1, noun_2) not in pairs and (noun_2, noun_1) not in pairs:
                        pairs.append((noun_1, noun_2))
    
    return pairs

In [8]:
def one_hot(y_train):
    y_train_one_hot = np.zeros((y_train.shape[0], 1))
    for i,y in enumerate(y_train):
        if int(y) == 0:
            y_train_one_hot[i, 0] = 1        
    return y_train_one_hot

In [9]:
def nn_model(X_train, y_train_one_hot): 
   

    input_shape = (X_train.shape[1],)
    model = Sequential()
    model.add(Dense(25, activation='sigmoid', input_shape=input_shape))
    model.add(Dense(25, activation='sigmoid'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(X_train, y_train_one_hot, batch_size=100, epochs=70, verbose = 0)

    return model

In [10]:
analyser = SentimentIntensityAnalyzer()

def extract_sentiment(sentence, sentiment_threshold = 0.23):
    score = analyser.polarity_scores(sentence)
    sentimentValues = [score["pos"], score["neg"], score["neu"]]

    if max(score["pos"], score["neg"]) < sentiment_threshold:
        return sentimentValues.index(max(sentimentValues))
    else:
        #only battle between positive and negative sentiment, becase most of the words are just neutral
        return sentimentValues.index(max(sentimentValues[0:2]))

In [11]:
def get_noun_sentiment_per_post(tr, common):
    nouns_sentiments = [] # array of dictionaries
    for index, row in tr.iterrows():
        nouns_sentiments.append({})
        post = " ".join(row.text)
        sentences = re.split('[;.?]', post)
        sentences = post.split(',')
        for sentence in sentences:
            maxSentiment = extract_sentiment(sentence)
            for word in sentence.split(' '):
                #if word already occured in current post
                if p.singular_noun(word):
                    word = p.singular_noun(word)
                if word in nouns_sentiments[-1]:
                    nouns_sentiments[-1][word][maxSentiment] += 1
                elif word in common:
                    nouns_sentiments[-1][word] = [0, 0, 0]
                    nouns_sentiments[-1][word][maxSentiment] += 1
    return nouns_sentiments

In [12]:
def word_atributes(t_1, t_2, common, common_3x, sentiment = True, pairs = []):
    if sentiment:
        sentiment_per_post_1 = get_noun_sentiment_per_post(t_1, common)
        sentiment_per_post_2 = get_noun_sentiment_per_post(t_2, common)
        t_1['bow_tokens_common'] = t_1.apply(lambda t: bow_sentiment(t.text, common, sentiment_per_post_1, pairs), axis=1).values.tolist()    
        t_1[common_3x] = pd.DataFrame(t_1.bow_tokens_common.values.tolist(), index= t_1.index)

        t_2['bow_tokens_common'] = t_2.apply(lambda t: bow_sentiment(t.text, common, sentiment_per_post_2, pairs), axis=1).values.tolist()
        t_2[common_3x] = pd.DataFrame(t_2.bow_tokens_common.values.tolist(), index= t_2.index)
    else:
        t_1['bow_tokens_common'] = t_1.apply(lambda t: bow(t.text, common, pairs), axis=1).values.tolist()    
        t_1[common_3x] = pd.DataFrame(t_1.bow_tokens_common.values.tolist(), index= t_1.index)

        t_2['bow_tokens_common'] = t_2.apply(lambda t: bow(t.text, common, pairs), axis=1).values.tolist()
        t_2[common_3x] = pd.DataFrame(t_2.bow_tokens_common.values.tolist(), index= t_2.index)

    X_train_1 = np.append(t_1[common_3x], t_2[common_3x], axis = 0)
    y_train_1 = np.append(np.zeros(len(t_1)), np.ones(len(t_2)))
    X_train_1, y_train_1 = shuffle_lists_together(X_train_1, y_train_1)
    
    y_train_1_one_hot = one_hot(y_train_1)
    return X_train_1, y_train_1, y_train_1_one_hot

In [None]:
topics = { "abortion": ["abortion_pro_choice.csv", "abortion_pro_life.csv"], 
           "gay_marriage":["gay_marriage_for.csv", "gay_marriage_against.csv"],
           "darwin_theory_of_evolution" :["darwin_theory_of_evolution_for.csv", "darwin_theory_of_evolution_against.csv"],
          "marijuana_legalization" :["marijuana_legalization_against.csv", "marijuana_legalization_for.csv"],
         }

for key, pair in topics.items():

    df_1 = pd.read_csv(f"./dataset_processed/{pair[0]}", converters={2:ast.literal_eval})
    df_2 = pd.read_csv(f"./dataset_processed/{pair[1]}", converters={2:ast.literal_eval})

      
    tr_1, ts_1 = train_test_split(df_1, test_size=0.2, random_state=42)
    tr_2, ts_2 = train_test_split(df_2, test_size=0.2, random_state=42)
    
    #X_train, X_val = train_test_split(X_train, test_size=0.25, random_state=34) # 0.25 x 0.8 = 0.2

    words_1 = get_words(tr_1)
    words_2 = get_words(tr_2)

    tags_1 = nltk.pos_tag(words_1)
    tags_2 = nltk.pos_tag(words_2)


    nouns_1, pairs_1 = get_noun_pairs(tags_1)
    nouns_2, pairs_2 = get_noun_pairs(tags_2)

    frequency_noun_pairs_1 = dict(Counter(pairs_1).most_common())
    frequency_noun_pairs_2 = dict(Counter(pairs_2).most_common())
    
    frequency_nouns_1 = dict(Counter(nouns_1).most_common())
    frequency_nouns_2 = dict(Counter(nouns_2).most_common()) 
    
    common_1 = list(frequency_nouns_1.keys())[:22]
    common_2 = list(frequency_nouns_2.keys())[:22]
    
    common_pair_1 = list(frequency_noun_pairs_1.keys())[:10]
    common_pair_2 = list(frequency_noun_pairs_2.keys())[:10]

    common = list(set([w for w in common_1] + [w for w in common_2]))
    common_pair = list(set([w for w in common_pair_1] + [w for w in common_pair_2]))
    print(common_pair)

    common_3x = list(range(len(common)*3))
    common_with_pair = list(range(len(common) + len(common_pair)))
    common_3x_with_pair = list(range(len(common)*3 + len(common_pair)))
    
    #tr_1['bow_tokens_common'] = tr_1.apply(lambda t: bow(t.text, common), axis=1).values.tolist()
    #tr_1[common] = pd.DataFrame(tr_1.bow_tokens_common.values.tolist(), index= tr_1.index)

    #tr_2['bow_tokens_common'] = tr_2.apply(lambda t: bow(t.text, common), axis=1).values.tolist()
    #tr_2[common] = pd.DataFrame(tr_2.bow_tokens_common.values.tolist(), index= tr_2.index)

    nouns_sentiments_1 = {}
    nouns_sentiments_2 = {}
    
    X_train_1, y_train_1, y_train_1_one_hot = word_atributes(tr_1, tr_2, common, common_3x_with_pair, True, common_pair)
    X_test_1, y_test_1, y_test_1_one_hot = word_atributes(ts_1, ts_2, common, common_3x_with_pair, True, common_pair)
    
    model_1_1 = lr_model(X_train_1, y_train_1)
    model_1_2 = nn_model(X_train_1, y_train_1_one_hot)
    
    print(f"Training Accuracy LR {key} - Common Words + Sentiment: {model_1_1.score(X_train_1, y_train_1):.2f}")
    print(f"Training Accuracy NN {key} - Common Words + Sentiment: {model_1_2.evaluate(X_train_1, y_train_1_one_hot, verbose=0)[1]:.2f}")    
    print(f"Testing Accuracy LR {key} - Common Words + Sentiment: {model_1_1.score(X_test_1, y_test_1):.2f}")
    print(f"Testing Accuracy NN {key} - Common Words + Sentiment: {model_1_2.evaluate(X_test_1, y_test_1_one_hot, verbose=0)[1]:.2f}")
             
    X_train_1, y_train_1, y_train_1_one_hot = word_atributes(tr_1, tr_2, common, common_with_pair, False, common_pair)
    X_test_1, y_test_1, y_test_1_one_hot = word_atributes(ts_1, ts_2, common, common_with_pair, False, common_pair)
    
    model_1_1 = lr_model(X_train_1, y_train_1)
    model_1_2 = nn_model(X_train_1, y_train_1_one_hot)
    
    print(f"Training Accuracy LR {key} - Common Words: {model_1_1.score(X_train_1, y_train_1):.2f}")
    print(f"Training Accuracy NN {key} - Common Words: {model_1_2.evaluate(X_train_1, y_train_1_one_hot, verbose=0)[1]:.2f}")    
    print(f"Testing Accuracy LR {key} - Common Words: {model_1_1.score(X_test_1, y_test_1):.2f}")
    print(f"Testing Accuracy NN {key} - Common Words: {model_1_2.evaluate(X_test_1, y_test_1_one_hot, verbose=0)[1]:.2f}")
    
    thr_1 = optimal_threshold(frequency_nouns_1)
    thr_2 = optimal_threshold(frequency_nouns_2)
    thr = max(thr_1, thr_2)

    fil_frequency_nouns_1 = filter_dict(frequency_nouns_1, thr)
    fil_frequency_nouns_2 = filter_dict(frequency_nouns_2, thr)
    
    for word in fil_frequency_nouns_1.keys():
        if word not in fil_frequency_nouns_2:
            fil_frequency_nouns_2[word] = 0
        
    for word in fil_frequency_nouns_2.keys():
        if word not in fil_frequency_nouns_1:
            fil_frequency_nouns_1[word] = 0
            
    pairs = get_similar_pairs(fil_frequency_nouns_1, fil_frequency_nouns_2)
    sim_words = list(set([ f1 for f1,f2 in pairs] + [ f2 for f1,f2 in pairs]))
    
    
    sim_3x = list(range(len(sim_words)*3))
    sim_3x_pair = list(range(len(sim_words)*3 + len(common_pair)))
    sim_words_pair = list(range(len(sim_words) + len(common_pair)))
    
    X_train_2, y_train_2, y_train_2_one_hot = word_atributes(tr_1, tr_2, sim_words, sim_3x_pair, True, common_pair)
    X_test_2, y_test_2, y_test_2_one_hot = word_atributes(ts_1, ts_2, sim_words, sim_3x_pair, True, common_pair)
    model_2_1 = lr_model(X_train_2, y_train_2)
    model_2_2 = nn_model(X_train_2, y_train_2_one_hot)
    
    print(f"Training Accuracy LR {key} - Similar Words + Sentiment: {model_2_1.score(X_train_2, y_train_2):.2f}")
    print(f"Training Accuracy NN {key} - Similar Words + Sentiment: {model_2_2.evaluate(X_train_2, y_train_2_one_hot, verbose=0)[1]:.2f}")
    print(f"Testing Accuracy LR {key} - Similar Words + Sentiment: {model_2_1.score(X_test_2, y_test_2):.2f}")
    print(f"Testing Accuracy NN {key} - Similar Words + Sentiment: {model_2_2.evaluate(X_test_2, y_test_2_one_hot, verbose=0)[1]:.2f}")
    
    
        
    X_train_2, y_train_2, y_train_2_one_hot = word_atributes(tr_1, tr_2, sim_words, sim_words_pair, False, common_pair)
    X_test_2, y_test_2, y_test_2_one_hot = word_atributes(ts_1, ts_2, sim_words, sim_words_pair, False, common_pair)
    
#     print(X_train_2[:3])
#     print(y_train_2_one_hot[:3])
    model_2_1 = lr_model(X_train_2, y_train_2)
    model_2_2 = nn_model(X_train_2, y_train_2_one_hot)
    print(f"Training Accuracy LR {key} - Similar Words: {model_2_1.score(X_train_2, y_train_2):.2f}")
    print(f"Training Accuracy NN {key} - Similar Words: {model_2_2.evaluate(X_train_2, y_train_2_one_hot, verbose=0)[1]:.2f}")
    print(f"Testing Accuracy LR {key} - Similar Words: {model_2_1.score(X_test_2, y_test_2):.2f}")
    print(f"Testing Accuracy NN {key} - Similar Words: {model_2_2.evaluate(X_test_2, y_test_2_one_hot, verbose=0)[1]:.2f}")
    
    print("----------------------------------------")

['birth control', 'pregnant woman', 'their body', 'her choice', 'unwanted child', 'my opinion', 'first place', 'unborn child', 'unwanted pregnancy', 'other person', 'her body', 'human being', 'unborn baby', 'your argument', 'human life']
Training Accuracy LR abortion - Common Words + Sentiment: 0.68
Training Accuracy NN abortion - Common Words + Sentiment: 0.72
Testing Accuracy LR abortion - Common Words + Sentiment: 0.56
Testing Accuracy NN abortion - Common Words + Sentiment: 0.54
Training Accuracy LR abortion - Common Words: 0.62
Training Accuracy NN abortion - Common Words: 0.65
Testing Accuracy LR abortion - Common Words: 0.56
Testing Accuracy NN abortion - Common Words: 0.59


In [None]:
22, 10, 0.2, 0.75, 0.15
['her body', 'other person', 'unborn baby', 'first place', 'birth control', 'their body', 'your argument', 'pregnant woman', 'unwanted child', 'unborn child', 'human being', 'her choice', 'unwanted pregnancy', 'human life', 'my opinion']
Training Accuracy LR abortion - Common Words + Sentiment: 0.67
Training Accuracy NN abortion - Common Words + Sentiment: 0.70
Testing Accuracy LR abortion - Common Words + Sentiment: 0.56
Testing Accuracy NN abortion - Common Words + Sentiment: 0.55
Training Accuracy LR abortion - Common Words: 0.62
Training Accuracy NN abortion - Common Words: 0.63
Testing Accuracy LR abortion - Common Words: 0.58
Testing Accuracy NN abortion - Common Words: 0.58
Training Accuracy LR abortion - Similar Words + Sentiment: 0.62
Training Accuracy NN abortion - Similar Words + Sentiment: 0.63
Testing Accuracy LR abortion - Similar Words + Sentiment: 0.53
Testing Accuracy NN abortion - Similar Words + Sentiment: 0.54
Training Accuracy LR abortion - Similar Words: 0.68
Training Accuracy NN abortion - Similar Words: 0.73
Testing Accuracy LR abortion - Similar Words: 0.56
Testing Accuracy NN abortion - Similar Words: 0.54
----------------------------------------
['other person', 'civil right', 'your argument', 'civil union', 'gay person', 'gay marriage', 'gay right', 'homosexual couple', 'marriage law', 'gay couple', 'common sense', 'same sex']
Training Accuracy LR gay_marriage - Common Words + Sentiment: 0.71
Training Accuracy NN gay_marriage - Common Words + Sentiment: 0.69
Testing Accuracy LR gay_marriage - Common Words + Sentiment: 0.59
Testing Accuracy NN gay_marriage - Common Words + Sentiment: 0.57
Training Accuracy LR gay_marriage - Common Words: 0.69
Training Accuracy NN gay_marriage - Common Words: 0.70
Testing Accuracy LR gay_marriage - Common Words: 0.60
Testing Accuracy NN gay_marriage - Common Words: 0.60
Training Accuracy LR gay_marriage - Similar Words + Sentiment: 0.66
Training Accuracy NN gay_marriage - Similar Words + Sentiment: 0.70
Testing Accuracy LR gay_marriage - Similar Words + Sentiment: 0.65
Testing Accuracy NN gay_marriage - Similar Words + Sentiment: 0.64
Training Accuracy LR gay_marriage - Similar Words: 0.77
Training Accuracy NN gay_marriage - Similar Words: 0.77
Testing Accuracy LR gay_marriage - Similar Words: 0.66
Testing Accuracy NN gay_marriage - Similar Words: 0.67
----------------------------------------
['scientific theory', 'single cell', 'bang theory', 'intelligent design', 'more cell', 'big bang', 'scientific method', 'micro evolution', 'fossil record', 'other hand', 'evolutionary theory', 'natural selection', 'other word', 'common ancestor', 'sheer coincidence']
Training Accuracy LR darwin_theory_of_evolution - Common Words + Sentiment: 0.66
Training Accuracy NN darwin_theory_of_evolution - Common Words + Sentiment: 0.67
Testing Accuracy LR darwin_theory_of_evolution - Common Words + Sentiment: 0.60
Testing Accuracy NN darwin_theory_of_evolution - Common Words + Sentiment: 0.58
Training Accuracy LR darwin_theory_of_evolution - Common Words: 0.62
Training Accuracy NN darwin_theory_of_evolution - Common Words: 0.61
Testing Accuracy LR darwin_theory_of_evolution - Common Words: 0.61
Testing Accuracy NN darwin_theory_of_evolution - Common Words: 0.59
Training Accuracy LR darwin_theory_of_evolution - Similar Words + Sentiment: 0.61
Training Accuracy NN darwin_theory_of_evolution - Similar Words + Sentiment: 0.63
Testing Accuracy LR darwin_theory_of_evolution - Similar Words + Sentiment: 0.60
Testing Accuracy NN darwin_theory_of_evolution - Similar Words + Sentiment: 0.59
Training Accuracy LR darwin_theory_of_evolution - Similar Words: 0.84
Training Accuracy NN darwin_theory_of_evolution - Similar Words: 0.74
Testing Accuracy LR darwin_theory_of_evolution - Similar Words: 0.47
Testing Accuracy NN darwin_theory_of_evolution - Similar Words: 0.60
----------------------------------------
['drug dealer', 'brain cell', 'personal relationship', 'your argument', 'your job', 'illegal drug', 'my family', 'marijuana smoke', 'gateway drug', 'dangerou substance', 'many person', 'side effect', 'marijuana use', 'other drug', 'black market', 'drug war']
Training Accuracy LR marijuana_legalization - Common Words + Sentiment: 0.79
Training Accuracy NN marijuana_legalization - Common Words + Sentiment: 0.72
Testing Accuracy LR marijuana_legalization - Common Words + Sentiment: 0.66
Testing Accuracy NN marijuana_legalization - Common Words + Sentiment: 0.68
Training Accuracy LR marijuana_legalization - Common Words: 0.70
Training Accuracy NN marijuana_legalization - Common Words: 0.68
Testing Accuracy LR marijuana_legalization - Common Words: 0.67
Testing Accuracy NN marijuana_legalization - Common Words: 0.68
Training Accuracy LR marijuana_legalization - Similar Words + Sentiment: 0.70
Training Accuracy NN marijuana_legalization - Similar Words + Sentiment: 0.68
Testing Accuracy LR marijuana_legalization - Similar Words + Sentiment: 0.69
Testing Accuracy NN marijuana_legalization - Similar Words + Sentiment: 0.68
Training Accuracy LR marijuana_legalization - Similar Words: 0.74
Training Accuracy NN marijuana_legalization - Similar Words: 0.71
Testing Accuracy LR marijuana_legalization - Similar Words: 0.68
Testing Accuracy NN marijuana_legalization - Similar Words: 0.68
----------------------------------------