/
save_stock_tweets.py
99 lines (87 loc) · 3.44 KB
/
save_stock_tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
import os
import pandas as pd
import json
import numpy as np
import time
from top_companies import get_companies, hand_made_list
#Variables that contains the user credentials to access Twitter API
access_token = os.environ['TWITTER_API_ACCESS_TOKEN']
access_token_secret = os.environ['TWITTER_API_ACCESS_TOKEN_SECRET']
consumer_key = os.environ['TWITTER_API_KEY']
consumer_secret = os.environ['TWITTER_SECRET_API_KEY']
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def __init__(self, coll=''):
self.coll = coll
def on_data(self, data):
print data
self.coll.insert(json.loads(data)) # add instance to mongo db get_tweets
return True
def on_error(self, status):
print status
# def get_companies():
# df = pd.read_csv('../data/companylist.csv')
# company_list = list(df['Symbol'])
# company_list = [ "#" + company for company in company_list ]
# return company_list
#
def run_twitter_query():
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#names = list(np.array(get_companies())[:,1])
#print names[num1:num2]
d = hand_made_list()
search_list = []
for key, value in d.items():
if key == 'SPY':
search_list.append(value[0]) # append the full anme of the symbol
search_list.append('#SP500') # Don't append #SPY because it's not helpful
search_list.append('$SP500')
elif key == 'F':
# search_list.append(value[0]) # append the full name of the symbol
search_list.append('Ford') # append the name of the symbol
elif key == 'GE':
search_list.append('General Electric') # append the full anme of the symbol
elif key == 'S':
search_list.append('Sprint') # append the full anme of the symbol
elif key == 'T':
search_list.append('AT&T') # append the full anme of the symbol
elif key == 'MU':
search_list.append('Micron Tech') # append the full anme of the symbol
elif key == 'TRI':
search_list.append('Thomson Reuters') # append the full anme of the symbol
else:
for cell in value:
search_list.append(cell)
stream.filter(track=search_list)
if __name__ == '__main__':
from pymongo import MongoClient
mongo_client = MongoClient()
db = mongo_client.get_tweets_3
coll = db.tweets
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener(coll)
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
# names = list(np.array(get_companies())[:,0])
# symbols = list(np.array(get_companies())[:,1])
# print symbols[:50]
d = hand_made_list()
search_list = []
for key, value in d.items():
for cell in value:
search_list.append(cell)
while True:
try:
stream.filter(track=search_list)
except:
print "error... who knows..."
time.sleep(5)