# This notebook is to upload collection of news to MongoDB in local machine and AWS 

In [1]:
import json
import pymongo
from pymongo import MongoClient
from pprint import pprint

### Check pymongo version 

In [2]:
pymongo.version

'3.8.0'

In [3]:
!pip freeze | grep pymongo

pymongo==3.8.0


In [4]:
!ls

FoxNewsScraper.py.ipynb          p4_data.ipynb
cnn_news.json                    p4_data_extract.ipynb
[34mdata[m[m                             p4_mongdb.ipynb
fox_news.json                    ph.txt
nlp_pipeline_pair_solution.ipynb top-headlines.json
nytimes_news.json


# Local MongoDB

In [5]:
# This creates a client that uses the default port on localhost.
# If connecting to AWS, you need a connection string.
# Can do the same thing with MongoClient("mongodb://localhost:27017")
#client = MongoClient('localhost', 27017)
client = MongoClient()

In [6]:
#Check database name
client.list_database_names()

['admin', 'books', 'config', 'local', 'outings', 'test']

## Create new database called 'nlp'

In [7]:
db = client['nlp']

## Import json file to MongoDB

In [8]:
with open('cnn_news.json') as f:
    file_data = json.load(f)


collection_cnn = db['cnn_news']

# use collection_currency.insert(file_data) if pymongo version < 3.0
#collection_cnn.insert_one(file_data) 
collection_cnn.insert_many(file_data)  

<pymongo.results.InsertManyResult at 0x10d160488>

In [9]:
with open('fox_news.json') as f:
    file_data = json.load(f)
    
    
collection_fox = db['fox_news']

collection_fox.insert_many(file_data)  

<pymongo.results.InsertManyResult at 0x10cbb7988>

In [10]:
with open('nytimes_news.json') as f:
    file_data = json.load(f)
    
    
collection_nytimes = db['nytimes_news']

collection_nytimes.insert_many(file_data) 

<pymongo.results.InsertManyResult at 0x10cb4fb08>

## Show the collections that are in there.¶

In [11]:
db.list_collection_names()

['nytimes_news', 'cnn_news', 'fox_news']

## How many documents are in there?

In [12]:
cnn = db.cnn_news
print(cnn.count())

329


  


In [13]:
fox = db.fox_news
print(fox.count())

357


  


In [14]:
nytimes = db.nytimes_news
print(nytimes.count())

315


  


In [15]:
# List all the book titles with the author
cursor = db.fox_news.find({}, {'_id':0, 'query': 1, 'title':1})

list(cursor)

[{'query': 'gun',
  'title': 'Trump rallies support for gun control, background checks'},
 {'query': 'gun',
  'title': 'Trump condemns violence in wake of mass shootings'},
 {'query': 'gun', 'title': 'Trump negotiating with Senate Dems on guns'},
 {'query': 'gun',
  'title': "McConnell willing to consider ‘bipartisan’ gun violence solutions, opposes 'infringing' on constitutional rights"},
 {'query': 'gun',
  'title': '2020 Dems embrace federal gun buyback program in wake of mass shootings'},
 {'query': 'gun',
  'title': "Rebecca Grant: What 'Top Gun: Maverick' trailer gets right (and wrong) about the Navy, advanced air combat now"},
 {'query': 'gun',
  'title': 'Gun purchases in New York will now take longer, after gov signs new law'},
 {'query': 'gun', 'title': 'Man uses gun to cut birthday cake by shooting it'},
 {'query': 'gun',
  'title': 'Chicago weekend shootings leave 8 dead, 40 wounded'},
 {'query': 'gun',
  'title': "Security scare shuts down Universal Studios parking garage:

In [16]:
cursor = db.fox_news.find({}, {'_id':0}).limit(4)

list(cursor)

[{'query': 'gun',
  'media_domain': 'foxnews.com',
  'title': 'Trump rallies support for gun control, background checks',
  'url': 'http://video.foxnews.com/v/6070874085001/',
  'article': []},
 {'query': 'gun',
  'media_domain': 'foxnews.com',
  'title': 'Trump condemns violence in wake of mass shootings',
  'url': 'http://video.foxnews.com/v/6068370715001/',
  'article': []},
 {'query': 'gun',
  'media_domain': 'foxnews.com',
  'title': 'Trump negotiating with Senate Dems on guns',
  'url': 'https://www.foxnews.com/politics/trump-negotiating-with-senate-dems-on-guns',
  'article': ['President says he has support in Congress; Rich Edson reports.',
 {'query': 'gun',
  'media_domain': 'foxnews.com',
  'title': "McConnell willing to consider ‘bipartisan’ gun violence solutions, opposes 'infringing' on constitutional rights",
  'url': 'https://www.foxnews.com/politics/mcconnell-willing-to-consider-bipartisan-gun-violence-solutions-opposes-infringing-on-constitutional-rights',
  'article':

In [17]:
# List all the book titles with the author
cursor = db.fox_news.find({'query': 'gay'}, {'_id':0})

list(cursor)

[{'query': 'gay',
  'media_domain': 'foxnews.com',
  'title': "Lance Bass says Britney Spears 'chuckled' after he made this revelation on her wedding night",
  'url': 'https://www.foxnews.com/entertainment/lance-bass-britney-spears-wedding-revelation',
  'article': ['Did you know these 5 tidbits about Britney Spears?',
   'Former &aposNSYNC member Lance Bass came out to Britney Spears the night she married Jason Allen Alexander in 2004, he revealed this week.',
   'Speaking on “Watch What Happens Live with Andy Cohen” on Monday, the former boyband member explained why he came out to Spears two years before his memorable People magazine cover in 2006, when he revealed his sexual identity to the world.',
   'MTV VMAS 2019: ARIANA GRANDE AND TAYLOR SWIFT SCORE 10 NOMINATIONS EACH',
   '"It was the night that she got married the first time, in Vegas ... to Jason," Bass began, referring to when Spears\xa0wed\xa0Alexander. The two were famously husband and wife\xa0for 55 hours before their m

In [18]:
# List all the book titles with the author
cursor = db.cnn_news.find({'query': 'gay'}, {'_id':0})

list(cursor)

[{'query': 'gay',
  'media_domain': 'cnn.com',
  'title': 'A pastor who wrote a bestselling relationship book says his marriage is over and he is no longer Christian',
  'url': 'https://www.cnn.com/2019/07/29/us/joshua-harris-divorce-apology-scli-intl/index.html',
  'article': [' (CNN)A  former pastor who wrote a bestselling book on traditional relationships has confirmed the end of his marriage, apologized for opposing LGBTQ rights and announced he is no longer a Christian.',
   'Joshua Harris\' book "I Kissed Dating Goodbye," which railed against sex before marriage and homosexuality, sold over 1 million copies and became a fixture in Christian youth groups after coming out 22 years ago.',
   'But Harris now says the 1997 work "contributed to a culture of exclusion and bigotry," and that he has "undergone a massive shift in regard to my faith in Jesus." ',
   'Writing on Instagram, he added: "By all the measurements that I have for defining a Christian, I am not a Christian."',
   '"

In [19]:
client.database_names()

  """Entry point for launching an IPython kernel.


['admin', 'books', 'config', 'local', 'nlp', 'outings', 'test']

## Drop MongoDB collection called 'cnn_news' and 'fox_news'

In [16]:
db.drop_collection('fox_news')

{'ns': 'nlp.fox_news', 'nIndexesWas': 1, 'ok': 1.0}

In [17]:
db.drop_collection('cnn_news')

{'ns': 'nlp.cnn_news', 'nIndexesWas': 1, 'ok': 1.0}

In [18]:
db.collection_names()

  """Entry point for launching an IPython kernel.


['nytimes_news']

## Drop a MongoDB database called 'nlp'

In [19]:
client.drop_database('nlp')

In [20]:
client.database_names()

  """Entry point for launching an IPython kernel.


['admin', 'books', 'config', 'local', 'outings', 'test']

# MongoDB on AWS 

In [39]:
from pymongo import MongoClient
config = {
  'host': '18.219.234.233:27017',
  'username': 'admin',
  'password': 'secure_password',
  'authSource': 'admin'
}

client = MongoClient(**config)

db = client.admin

In [40]:
#client.database_names()

In [41]:
client.list_database_names()

['admin', 'config', 'local']

In [38]:
!ls

FoxNewsScraper.py.ipynb          p4_data.ipynb
cnn_news.json                    p4_data_extract.ipynb
[34mdata[m[m                             p4_mongdb.ipynb
fox_news.json                    ph.txt
nlp_pipeline_pair_solution.ipynb top-headlines.json
nytimes_news.json


### Create new collection 

In [42]:
db.create_collection('fox_news')

Collection(Database(MongoClient(host=['18.219.234.233:27017'], document_class=dict, tz_aware=False, connect=True, authsource='admin'), 'admin'), 'fox_news')

In [43]:
with open('fox_news.json') as f:
    file_data = json.load(f)
    
    
collection_fox = db['fox_news']

collection_fox.insert_many(file_data)  

<pymongo.results.InsertManyResult at 0x10cbb7388>

### Create new collection 

In [44]:
db.create_collection('cnn_news')

Collection(Database(MongoClient(host=['18.219.234.233:27017'], document_class=dict, tz_aware=False, connect=True, authsource='admin'), 'admin'), 'cnn_news')

In [45]:
with open('cnn_news.json') as f:
    file_data = json.load(f)
    
    
collection_cnn = db['cnn_news']

collection_cnn.insert_many(file_data)  

<pymongo.results.InsertManyResult at 0x10cdcd988>

In [46]:
db.list_collection_names()

['cnn_news', 'fox_news', 'system.users', 'system.version']

In [48]:
fox = db.fox_news
cnn = db.cnn_news
print(fox.count(), cnn.count())

357 329


  This is separate from the ipykernel package so we can avoid doing imports until
