# 18.5 Case Study: A MongoDB JSON Document Database
### Installing the Python Libraries Required for Interacting with MongoDB
### keys.py 
## 18.5.1 Creating the MongoDB Atlas Cluster
### Creating Your First Database User
### Whitelist Your IP Address
### Connect to Your Cluster
## 18.5.2 Streaming Tweets into MongoDB
### Use Tweepy to Authenticate with Twitter

In [13]:
!pip install tweepy pymongo

Collecting pymongo
  Downloading pymongo-3.11.3-cp38-cp38-win_amd64.whl (383 kB)
Installing collected packages: pymongo
Successfully installed pymongo-3.11.3


In [4]:
import tweepy, keys

In [5]:
auth = tweepy.OAuthHandler(
    keys.consumer_key, keys.consumer_secret)
auth.set_access_token(keys.access_token, 
    keys.access_token_secret)

In [6]:
api = tweepy.API(auth, wait_on_rate_limit=True, 
                 wait_on_rate_limit_notify=True)               

### Loading the Senators’ Data

In [7]:
import pandas as pd

In [8]:
senators_df = pd.read_csv('senators.csv')

In [9]:
senators_df['TwitterID'] = senators_df['TwitterID'].astype(str)

In [10]:
pd.options.display.max_columns = 6

In [11]:
senators_df.head()

Unnamed: 0,State,Name,Party,TwitterHandle,TwitterID
0,AL,Richard Shelby,R,SenShelby,21111098
1,AL,Doug Jomes,D,SenDougJones,941080085121175552
2,AK,Lisa Murkowski,R,lisamurkowski,18061669
3,AK,Dan Sullivan,R,SenDanSullivan,2891210047
4,AZ,Jon Kyl,R,SenJonKyl,24905240


### Configuring the MongoClient 

In [14]:
from pymongo import MongoClient

In [15]:
atlas_client = MongoClient(keys.mongo_connection_string)

In [16]:
db = atlas_client.senators 

### Setting up Tweet Stream

In [17]:
from tweetlistener import TweetListener

In [18]:
tweet_limit = 10000

In [19]:
twitter_stream = tweepy.Stream(api.auth, 
    TweetListener(api, db, tweet_limit))

### Starting the Tweet Stream

In [20]:
twitter_stream.filter(track=senators_df.TwitterHandle.tolist(),
    follow=senators_df.TwitterID.tolist())

Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401
Error: 401


KeyboardInterrupt: 

### Class TweetListener

```python
# tweetlistener.py
"""TweetListener downloads tweets and stores them in MongoDB."""
import json
import tweepy

class TweetListener(tweepy.StreamListener):
    """Handles incoming Tweet stream."""

    def __init__(self, api, database, limit=10000):
        """Create instance variables for tracking number of tweets."""
        self.db = database
        self.tweet_count = 0
        self.TWEET_LIMIT = limit  # 10,000 by default
        super().__init__(api)  # call superclass's init

    def on_connect(self):
        """Called when your connection attempt is successful, enabling 
        you to perform appropriate application tasks at that point."""
        print('Successfully connected to Twitter\n')

    def on_data(self, data):
        """Called when Twitter pushes a new tweet to you."""
        self.tweet_count += 1  # track number of tweets processed
        json_data = json.loads(data)  # convert string to JSON
        self.db.tweets.insert_one(json_data)  # store in tweets collection
        print(f'    Screen name: {json_data["user"]["name"]}') 
        print(f'     Created at: {json_data["created_at"]}')         
        print(f'Tweets received: {self.tweet_count}')         

        # if TWEET_LIMIT is reached, return False to terminate streaming
        return self.tweet_count != self.TWEET_LIMIT
    
    def on_error(self, status):
        print(status)
        return True
```

### Counting Tweets for Each Senator

In [None]:
db.tweets.create_index([('$**', 'text')])

In [None]:
tweet_counts = []

In [None]:
for senator in senators_df.TwitterHandle:
    tweet_counts.append(db.tweets.count_documents(
        {"$text": {"$search": senator}}))

### Show Tweet Counts for Each Senator 

In [None]:
tweet_counts_df = senators_df.assign(Tweets=tweet_counts)

In [None]:
tweet_counts_df.sort_values(by='Tweets', 
    ascending=False).head(10)

### Get the State Locations for Plotting Markers 

In [None]:
from geopy import OpenMapQuest

In [None]:
import time

In [None]:
from state_codes import state_codes

In [None]:
geo = OpenMapQuest(api_key=keys.mapquest_key) 

In [None]:
states = tweet_counts_df.State.unique()

In [None]:
states.sort()

In [None]:
locations = []

In [None]:
for state in states:
    processed = False
    delay = .1 
    while not processed:
        try:
            locations.append(
                geo.geocode(state_codes[state] + ', USA'))
            print(locations[-1])  
            processed = True
        except:  # timed out, so wait before trying again
            print('OpenMapQuest service timed out. Waiting.')
            time.sleep(delay)
            delay += .1

### Grouping the Tweet Counts by State 

In [None]:
tweets_counts_by_state = tweet_counts_df.groupby(
    'State', as_index=False).sum()

In [None]:
tweets_counts_by_state.head()

### Creating the Map 

In [None]:
import folium

In [None]:
usmap = folium.Map(location=[39.8283, -98.5795], 
                   zoom_start=4, detect_retina=True,
                   tiles='Stamen Toner')

### Creating a Choropleth to Color the Map 

In [None]:
 choropleth = folium.Choropleth(
    geo_data='us-states.json',
    name='choropleth',
    data=tweets_counts_by_state,
    columns=['State', 'Tweets'],
    key_on='feature.id',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Tweets by State'
).add_to(usmap)

In [None]:
layer = folium.LayerControl().add_to(usmap)

### Creating the Map Markers for Each State 

In [None]:
sorted_df = tweet_counts_df.sort_values(
    by='Tweets', ascending=False)

In [None]:
for index, (name, group) in enumerate(sorted_df.groupby('State')):
    strings = [state_codes[name]]  # used to assemble popup text

    for s in group.itertuples():
        strings.append(
            f'{s.Name} ({s.Party}); Tweets: {s.Tweets}')
        
    text = '<br>'.join(strings)  
    marker = folium.Marker(
        (locations[index].latitude, locations[index].longitude), 
        popup=text)
    marker.add_to(usmap) 

### Displaying the Map 

In [None]:
usmap # in a notebook, this will display the map without the need to save it to a file 

If the preceding snippet does not display the map for you, uncomment the following snippet and execute it to save the HTML file to disk as we do in the chapter. You can then open that HTML file in your browser.

In [None]:
# usmap.save('SenatorsTweets.html')

In [None]:
##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and                    #
# Pearson Education, Inc. All Rights Reserved.                           #
#                                                                        #
# DISCLAIMER: The authors and publisher of this book have used their     #
# best efforts in preparing the book. These efforts include the          #
# development, research, and testing of the theories and programs        #
# to determine their effectiveness. The authors and publisher make       #
# no warranty of any kind, expressed or implied, with regard to these    #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or       #
# consequential damages in connection with, or arising out of, the       #
# furnishing, performance, or use of these programs.                     #
##########################################################################