In [1]:
!gdown --id 10Yv6xUd1ufDilPcLdhR8L-zQZX5yNwSZ

Downloading...
From: https://drive.google.com/uc?id=10Yv6xUd1ufDilPcLdhR8L-zQZX5yNwSZ
To: /content/dem-vs-rep.zip
100% 9.29M/9.29M [00:00<00:00, 48.7MB/s]


In [2]:

import os
import zipfile

local_zip = './dem-vs-rep.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall()
zip_ref.close()

In [3]:
train_tweets = os.path.join('./dem-vs-rep/train.csv')
test_tweets = os.path.join('./dem-vs-rep/test.csv')

In [4]:
import pandas
train_df = pandas.read_csv(train_tweets)
test_df = pandas.read_csv(test_tweets)
print(train_df[:1])
print(test_df[:1])

      Party         Handle                                              Tweet
0  Democrat  RepDarrenSoto  Today, Senate Dems vote to #SaveTheInternet. P...
      Party        Handle                                              Tweet
0  Democrat  RepAdamSmith  Today the House passed an omnibus spending bil...


In [5]:
import string

stopwords = ["a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at",
             "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "could", "did", "do",
             "does", "doing", "down", "during", "each", "few", "for", "from", "further", "had", "has", "have", "having",
             "he", "hed", "hes", "her", "here", "heres", "hers", "herself", "him", "himself", "his", "how",
             "hows", "i", "id", "ill", "im", "ive", "if", "in", "into", "is", "it", "its", "itself",
             "lets", "me", "more", "most", "my", "myself", "nor", "of", "on", "once", "only", "or", "other", "ought",
             "our", "ours", "ourselves", "out", "over", "own", "same", "she", "shed", "shell", "shes", "should",
             "so", "some", "such", "than", "that", "thats", "the", "their", "theirs", "them", "themselves", "then",
             "there", "theres", "these", "they", "theyd", "theyll", "theyre", "theyve", "this", "those", "through",
             "to", "too", "under", "until", "up", "very", "was", "we", "wed", "well", "were", "weve", "were",
             "what", "whats", "when", "whens", "where", "wheres", "which", "while", "who", "whos", "whom", "why",
             "whys", "with", "would", "you", "youd", "youll", "youre", "youve", "your", "yours", "yourself",
             "yourselves"]

table = str.maketrans('', '', string.punctuation)

In [6]:
import numpy as np

train_tweets = []
train_labels = []

for i in range(len(train_df.index)):
  pre_tweet = train_df.iloc[i]['Tweet']
  post_tweet = ""
  for word in pre_tweet.split():
    if word not in stopwords:
      post_tweet += word + " "
  train_tweets.append(post_tweet)
  party = (train_df.iloc[i]['Party'])
  if party == 'Democrat':
    train_labels.append(0)
  else:
    train_labels.append(1)

test_tweets = []
test_labels = []

for i in range(len(test_df.index)):
  pre_tweet = train_df.iloc[i]['Tweet']
  post_tweet = ""
  for word in pre_tweet.split():
    if word not in stopwords:
      post_tweet += word + " "
  test_tweets.append(post_tweet)
  party = (test_df.iloc[i]['Party'])
  if party == 'Democrat':
    test_labels.append(0)
  else:
    test_labels.append(1)

train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

In [7]:
print(len(train_tweets))

69107


In [8]:
print(train_tweets[:2])
print(train_labels[:2])

print(test_tweets[:2])
print(test_labels[:2])

['Today, Senate Dems vote #SaveTheInternet. Proud support similar #NetNeutrality legislation House… https://t.co/n3tggDLU1L ', 'RT @WinterHavenSun: Winter Haven resident / Alta Vista teacher one several recognized @RepDarrenSoto National Teacher Apprecia… ']
[0 0]
['Today, Senate Dems vote #SaveTheInternet. Proud support similar #NetNeutrality legislation House… https://t.co/n3tggDLU1L ', 'RT @WinterHavenSun: Winter Haven resident / Alta Vista teacher one several recognized @RepDarrenSoto National Teacher Apprecia… ']
[0 0]


In [9]:
vocab_size = 69701
embedding_dim = 16
max_length = 200
padding_type='post'
trunc_type='post'
OOV_token = "<OOV>"

In [10]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words = vocab_size, oov_token=OOV_token)
tokenizer.fit_on_texts(train_tweets)

word_index = tokenizer.word_index

train_sequences = tokenizer.texts_to_sequences(train_tweets)
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(test_tweets)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [11]:
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

print(decode_review(train_padded[0]))
print(train_tweets[0])

today senate dems vote savetheinternet proud support similar netneutrality legislation house… https t co n3tggdlu1l ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?
Today, Senate Dems vote #SaveTheInternet. Proud support similar #NetNeutrality legislation House… https://t.co/n3tggDLU1L 


In [15]:
import tensorflow as tf
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
adam = tf.keras.optimizers.Adam(learning_rate = 0.0001, beta_1=0.9, beta_2=0.999, amsgrad=False)

model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 200, 16)           1115216   
                                                                 
 bidirectional_2 (Bidirectio  (None, 32)               4224      
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 24)                792       
                                                                 
 dense_5 (Dense)             (None, 1)                 25        
                                                                 
Total params: 1,120,257
Trainable params: 1,120,257
Non-trainable params: 0
_________________________________________________________________


In [16]:
num_epochs = 10
model.fit(train_padded, train_labels, epochs=num_epochs,
          validation_data=(testing_padded, test_labels))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
 211/2160 [=>............................] - ETA: 5:11 - loss: 0.0627 - accuracy: 0.9819

KeyboardInterrupt: ignored

In [17]:
e = model.layers[0]
weights = e.get_weights()[0]
print(weights.shape)

(69701, 16)


<h1> Input statement below </h1>
<p> Insert a sentence down below, then run the remaining cells to clasify your statement </p>


In [76]:
sentence = ["We need to bring back Obamacare. We need to change our system's healthcare system."]

<p> Run the remaining cells to determine the model's interpretation of a democratic or repulican statement <p>

In [77]:
sequence = tokenizer.texts_to_sequences(sentence)
print(sequence)

[[13, 58, 176, 412, 128, 1465, 13, 58, 176, 520, 91, 1, 528, 446]]


In [78]:
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print("Result:", model.predict(padded)[0][0])
print()
result = ""
if model.predict(padded)[0][0] < 0.37:
  result = "Republican"
elif model.predict(padded)[0][0] > 0.63:
  result = "Democratic"
else:
  result = "Neutral"
print("Output:", result)

Result: 0.9941735

Output: Democratic


<h1> Interpretting the results <h1>
<p> Look at the value in your cell. The value is a number between 0 and 1.


*   If your number is closer to 0, your statement aligns closer to a ***Republican*** statement
*   If your number is closer to 1, your statement aligns closer to a ***Democratic*** statement