# TweetNMeet Bot

Hackathon 2017
Christine Chung & Anne Lai

## Set-up

- Install nltk, gensim, twython
- Get OAuth credentials from Twitter
- mkdir ~/twitter-files
- Create a credentials.txt file in twitter-files/
- Add your oauth info in this format: 
```
app_key=YOUR CONSUMER KEY  
app_secret=YOUR CONSUMER SECRET  
oauth_token=YOUR ACCESS TOKEN  
oauth_token_secret=YOUR ACCESS TOKEN SECRET
```
- Export to environment: `export TWITTER="/path/to/your/twitter-files"`

In [6]:
from nltk.corpus import stopwords 
from nltk.stem.wordnet import WordNetLemmatizer
import string
import gensim
from gensim import corpora
from twython import Twython
from nltk.twitter import Twitter, Query, Streamer, credsfromfile
import json
from nltk.corpus import twitter_samples

## Cleaning Data

In [7]:
stop = set(stopwords.words('english'))
exclude = set(string.punctuation) 
lemma = WordNetLemmatizer()

def clean(doc):
    stop_free = " ".join([i for i in doc.lower().split() if i not in stop])
    punc_free = ''.join(ch for ch in stop_free if ch not in exclude)
    normalized = " ".join(lemma.lemmatize(word) for word in punc_free.split())
    return normalized

In [8]:
meetup = twitter_samples.strings('../../../twitter-files/tweets.20170803-160530.json')

In [9]:
doc_clean = [clean(tweet).split() for tweet in meetup]        

In [10]:
dictionary = corpora.Dictionary(doc_clean)

In [11]:
doc_term_matrix = [dictionary.doc2bow(doc) for doc in doc_clean]

## Training LDA Model

In [13]:
lda = gensim.models.ldamodel.LdaModel

In [14]:
ldamodel = lda(doc_term_matrix, num_topics=10, id2word = dictionary, passes=50)

KeyboardInterrupt: 

In [52]:
ldamodel.print_topics()

[(0,
  '0.053*"rt" + 0.033*"ukip" + 0.020*"cameron" + 0.016*"farage" + 0.016*"bbcqt" + 0.015*"david" + 0.014*"asknigelfarage" + 0.009*"tory" + 0.009*"nigel" + 0.009*"tonight"'),
 (1,
  '0.047*"rt" + 0.029*"miliband" + 0.019*"tory" + 0.014*"support" + 0.014*"ed" + 0.013*"time" + 0.013*"http…" + 0.012*"come" + 0.012*"man" + 0.011*"claiming"'),
 (2,
  '0.044*"rt" + 0.042*"snp" + 0.030*"tory" + 0.020*"miliband" + 0.019*"labour" + 0.014*"vote" + 0.012*"rather" + 0.012*"deal" + 0.011*"would" + 0.011*"ed"')]

# Playing with Twitter

In [62]:
tw = Twitter()

In [63]:
#Writing meetup tweets out
tw.tweets(keywords='meetup', to_screen=False, stream=False, limit=20000) #sample from the public stream

Writing to /Users/christine/twitter-files/tweets.20170803-160530.json
No more Tweets available through rest api
Written 6432 Tweets


In [13]:
oauth = credsfromfile()
client = Query(**oauth)

In [26]:
client = Twython(**oauth)

In [85]:
myTweets = [tweet['text'] for tweet in client.get_user_timeline(
                                                    screen_name="curmudgeon", 
                                                    include_rts=True,
                                                    count=200)]

In [86]:
myTweets = [clean(tweet).split() for tweet in myTweets]        

In [87]:
myTweets

[['rt',
  'alexcengler',
  'always',
  'proud',
  'see',
  'urbaninstitute',
  'prominent',
  'meaningful',
  'role',
  'policy',
  'debate',
  'httpstcoirksh4kmml'],
 ['lols', 'httpstcosczou6rbm5'],
 ['rt',
  'scalalang',
  'scala',
  'center',
  'tiny',
  'bit',
  'donatable',
  'would',
  'people',
  'ok',
  'backed',
  'development',
  'scalaquest',
  'game',
  'https…'],
 ['rt',
  'fommil',
  'heathercmiller',
  'itrvd',
  'scalalang',
  'would',
  'appreciated',
  'fund',
  'running',
  'dry',
  'let',
  'sponsored',
  'd…'],
 ['pretty', 'darn', 'cool', 'application', 'data', 'httpstcoiot7p0rpfr'],
 ['rt',
  'lenalovesstuff',
  'hey',
  'everybody',
  'were',
  'looking',
  'gt',
  'senior',
  'core',
  'engineer',
  'see',
  'httpstco0voz4jhi8v'],
 ['httpstconoyua1lufd'],
 ['rt',
  'eryno',
  'best',
  'way',
  'get',
  'woman',
  'conference',
  'get',
  'woman',
  'social',
  'circle',
  'well',
  'need',
  'them'],
 ['rt',
  'lenalovesstuff',
  'thanks',
  'carolinemarcks',
 