# MongoDB with python

In [1]:
%matplotlib inline
import pymongo
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
from bson import json_util, ObjectId
import pandas as pd
from pandas import DataFrame
from pandas.io.json import json_normalize
import numpy as np
import requests
import json, os
import configparser
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
import seaborn as sns
import warnings
import random
import pprint
from datetime import datetime
random.seed(datetime.now())
warnings.filterwarnings('ignore')

# Make plots larger
plt.rcParams['figure.figsize'] = (10, 6)

###  Install PyMongo¶

Linux/OS X   
You can use pip to install:  

```bash  
pip install pymongo
```

###  Create a Connection  


Use MongoClient to create a connection:

```python  
client = MongoClient()
```
If you do not specify any arguments to MongoClient, then MongoClient defaults to the MongoDB instance that runs on the localhost interface on port 27017.   

You can also specify a complete MongoDB URI to define the connection, including explicitly specifying the host and port number. For example, the following creates a connection to a MongoDB instance that runs on mongodb0.example.net and the port of 27017:  


```python
client = MongoClient("mongodb://mongodb0.example.net:27017")
```

### Access Database Objects  

The first fundamental class of objects you will interact with using pymongo is Database which represents the database construct in MongoDB. Databases hold groups of logically related collections. MongoDB creates new databases implicitly upon their first use.   

To assign the database named primer to the local variable db, you can use attribute access, as in the following:  

```python
db = client.primer
```
You can also access databases using dictionary-style access, which removes Python-specific naming restrictions, as in the following:  

```python
db = client['primer']
```

### Access Collection Objects   

The second fundamental class of objects you will interact using pymongo is Collection, which represents the collection construct in MongoDB. Collections hold groups of related documents.   

You can access collection objects directly using dictionary-style or attribute access from a Database object, as in the following examples:  


```python
db.dataset
db['dataset']
```
You may also assign the collection object to a variable for use elsewhere, as in the following examples:  

```python
coll = db.dataset
coll = db['dataset']
```

In [2]:
config = configparser.ConfigParser()
config.read('./data/mongo.ini')

['./data/mongo.ini']

In [3]:
port = config.get('Mongo', 'port')
print (port)

27017


In [4]:
client = MongoClient(config.get('Mongo', 'host'), int(port))

In [5]:
client.analytics

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'analytics')

In [6]:
client.analytics.collection_names(include_system_collections=False)

['tweets', 'blog', 'users']

In [7]:
try:
    result = client.analytics.blog.drop()
    print ("analytics blog dropped")
except:
    pass

analytics blog dropped


In [8]:
try:
    result = client.analytics.users.drop()
    print ("analytics users dropped")
except:
    pass

analytics users dropped


In [9]:
try:
    result = client.analytics.tweets.drop()
    print ("analytics tweets dropped")
except:
    pass

analytics tweets dropped


In [10]:
client.analytics

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'analytics')

In [11]:
client.analytics.collection_names(include_system_collections=False)

[]

In [12]:
result = client.analytics.users.insert_one({
    "username" : "Goku",
    "firstname" : "Goku",
    "surname" : "Brown",
    "dateofbirth" : datetime(1973, 7, 8),
    "email" : "Goku@GokuBrown.com",
    "score" : 5
})
result.inserted_id

ObjectId('5ac19b70a313fc02d6006fda')

In [13]:
result = client.analytics.blog.insert_one({
    "post_id": 1,
    "author": "Goku",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.utcnow()
})
result.inserted_id    

ObjectId('5ac19b70a313fc02d6006fdb')

## You need to be careful; running a script twice may duplicate your data

In [14]:
result = client.analytics.blog.insert_one({
    "post_id": 1,
    "author": "Goku",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.utcnow()
})
result.inserted_id 

ObjectId('5ac19b71a313fc02d6006fdc')

In [15]:
db = client.analytics

In [16]:
db.collection_names(include_system_collections=False)

['blog', 'users']

In [17]:
for post in db.blog.find({"author": "Goku"}):
    pprint.pprint(post)
# You need to be careful; running a script twice may duplicate your data    

{'_id': ObjectId('5ac19b70a313fc02d6006fdb'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 40, 945000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdc'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 41, 4000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


### Getting a Single Document With find_one()   

In [18]:
pprint.pprint(db.blog.find_one())

{'_id': ObjectId('5ac19b70a313fc02d6006fdb'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 40, 945000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


In [19]:
pprint.pprint(db.blog.find_one({"author": "Goku"}))

{'_id': ObjectId('5ac19b70a313fc02d6006fdb'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 40, 945000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


###  Querying By ObjectId   

In [20]:
mid=db.blog.find_one({"author": "Goku"})['_id'] 
mid

ObjectId('5ac19b70a313fc02d6006fdb')

In [21]:
pprint.pprint(db.blog.find_one({"_id": mid}))

{'_id': ObjectId('5ac19b70a313fc02d6006fdb'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 40, 945000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}


### Bulk Inserts   

In [22]:
new_posts = [{"author": "Goku",
    "text": "Another post on bulk inserts!",
    "tags": ["bulk", "insert"],
    "date": datetime(2017, 3, 12, 11, 14)},
    {"author": "Vegeta",
    "title": "MongoDB is fun",
    "text": "and pretty easy too!",
    "date": datetime.utcnow()}]
result = db.blog.insert_many(new_posts)
result.inserted_ids

[ObjectId('5ac19b71a313fc02d6006fdd'), ObjectId('5ac19b71a313fc02d6006fde')]

### Querying for More Than One Document  

In [23]:
for post in db.blog.find():
    pprint.pprint(post)

{'_id': ObjectId('5ac19b70a313fc02d6006fdb'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 40, 945000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdc'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 41, 4000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdd'),
 'author': 'Goku',
 'date': datetime.datetime(2017, 3, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post on bulk inserts!'}
{'_id': ObjectId('5ac19b71a313fc02d6006fde'),
 'author': 'Vegeta',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 41, 69000),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}


In [24]:
for post in db.blog.find({"author": "Goku"}):
    pprint.pprint(post)

{'_id': ObjectId('5ac19b70a313fc02d6006fdb'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 40, 945000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdc'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 41, 4000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdd'),
 'author': 'Goku',
 'date': datetime.datetime(2017, 3, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post on bulk inserts!'}


### Counting  

In [25]:
db.blog.count()

4

In [26]:
db.blog.find({"author": "Goku"}).count()

3

### Range Queries

In [27]:
d = datetime(2017, 5, 5, 12)
for post in db.blog.find({"date": {"$gt": d}}).sort("author"):
    pprint.pprint(post)

{'_id': ObjectId('5ac19b70a313fc02d6006fdb'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 40, 945000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdc'),
 'author': 'Goku',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 41, 4000),
 'post_id': 1,
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('5ac19b71a313fc02d6006fde'),
 'author': 'Vegeta',
 'date': datetime.datetime(2018, 4, 2, 2, 54, 41, 69000),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}


### Indexing  

Note if there are duplicates you'll get this error.

```python
DuplicateKeyError: E11000 duplicate key error collection: analytics.users index: username_1 dup key: { : "Goku" }
```

In [28]:
result = db.users.insert_one({
    "username" : "Vegeta",
    "firstname" : "Vegeta",
    "surname" : "Saiyan",
    "dateofbirth" : datetime(1903, 7, 7),
    "email" : "Vegeta@Saiyan.com",
    "score" : 3
})
result.inserted_id

ObjectId('5ac19b71a313fc02d6006fdf')

In [29]:
for user in db.users.find():
    pprint.pprint(user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': 'Goku@GokuBrown.com',
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}


In [30]:
result = db.users.insert_many([{
    "username" : "Bulma",
    "firstname" : "Bulma",
    "surname" : "Saiyan",
    "dateofbirth" : datetime(1919, 7, 7),
    "email" : "Bulma@Saiyan.com",
    "score" : 2},
{
    "username" : "Trunks",
    "firstname" : "Trunks",
    "surname" : "Saiyan",
    "dateofbirth" : datetime(1955, 7, 7),
    "email" : "Trunks@Saiyan.com",
    "score" : 5
}])

In [31]:
for user in db.users.find():
    pprint.pprint(user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': 'Goku@GokuBrown.com',
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe0'),
 'dateofbirth': datetime.datetime(1919, 7, 7, 0, 0),
 'email': 'Bulma@Saiyan.com',
 'firstname': 'Bulma',
 'score': 2,
 'surname': 'Saiyan',
 'username': 'Bulma'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe1'),
 'dateofbirth': datetime.datetime(1955, 7, 7, 0, 0),
 'email': 'Trunks@Saiyan.com',
 'firstname': 'Trunks',
 'score': 5,
 'surname': 'Saiyan',
 'username': 'Trunks'}


In [32]:
result = db.users.create_index([('username', pymongo.ASCENDING)], unique=True)
sorted(list(db.users.index_information()))

['_id_', 'username_1']

In [33]:
user_profiles = [
{
    "username" : "Vegeta",
    "firstname" : "Vegeta",
    "surname" : "Saiyan",
    "dateofbirth" : datetime(1903, 7, 7),
    "email" : "Vegeta@Saiyan.com",
    "score" : 3
}]

In [34]:
pprint.pprint(user_profiles)

[{'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
  'email': 'Vegeta@Saiyan.com',
  'firstname': 'Vegeta',
  'score': 3,
  'surname': 'Saiyan',
  'username': 'Vegeta'}]


In [35]:
try:
  result = db.users.insert_many(user_profiles)
except:
  print ("insert_many(user_profiles) failed")

insert_many(user_profiles) failed


_The index prevents us from inserting a document whose username is already in the collection_

### Update  

In [36]:
import copy
orig_user = db.users.find_one({"username":"Goku"})
new_user = copy.deepcopy(orig_user)
pprint.pprint(new_user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': 'Goku@GokuBrown.com',
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}


In [37]:
# modify the copy to add an email address
l=[]
l.append(new_user["email"])
l.append("Goku@NikBearBrown.com")
l

['Goku@GokuBrown.com', 'Goku@NikBearBrown.com']

In [38]:
new_user["email"] = l
pprint.pprint(new_user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': ['Goku@GokuBrown.com', 'Goku@NikBearBrown.com'],
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}


In [39]:
# replace the matched document with the contents of new_user
db.users.replace_one({"username":"Goku"}, new_user)

<pymongo.results.UpdateResult at 0x114514d88>

In [40]:
goku = db.users.find_one({"username":"Goku"})
pprint.pprint(goku)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': ['Goku@GokuBrown.com', 'Goku@NikBearBrown.com'],
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}


### Delete  

In [41]:
for user in db.users.find():
    pprint.pprint(user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': ['Goku@GokuBrown.com', 'Goku@NikBearBrown.com'],
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe0'),
 'dateofbirth': datetime.datetime(1919, 7, 7, 0, 0),
 'email': 'Bulma@Saiyan.com',
 'firstname': 'Bulma',
 'score': 2,
 'surname': 'Saiyan',
 'username': 'Bulma'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe1'),
 'dateofbirth': datetime.datetime(1955, 7, 7, 0, 0),
 'email': 'Trunks@Saiyan.com',
 'firstname': 'Trunks',
 'score': 5,
 'surname': 'Saiyan',
 'username': 'Trunks'}


In [42]:
db.users.delete_one({"score":0})

<pymongo.results.DeleteResult at 0x11451db88>

In [43]:
for user in db.users.find():
    pprint.pprint(user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': ['Goku@GokuBrown.com', 'Goku@NikBearBrown.com'],
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe0'),
 'dateofbirth': datetime.datetime(1919, 7, 7, 0, 0),
 'email': 'Bulma@Saiyan.com',
 'firstname': 'Bulma',
 'score': 2,
 'surname': 'Saiyan',
 'username': 'Bulma'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe1'),
 'dateofbirth': datetime.datetime(1955, 7, 7, 0, 0),
 'email': 'Trunks@Saiyan.com',
 'firstname': 'Trunks',
 'score': 5,
 'surname': 'Saiyan',
 'username': 'Trunks'}


### Query Operators

Queries use a document-style (or python dict) syntax.

In [44]:
for user in db.users.find({'score': 5}):
    pprint.pprint(user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': ['Goku@GokuBrown.com', 'Goku@NikBearBrown.com'],
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe1'),
 'dateofbirth': datetime.datetime(1955, 7, 7, 0, 0),
 'email': 'Trunks@Saiyan.com',
 'firstname': 'Trunks',
 'score': 5,
 'surname': 'Saiyan',
 'username': 'Trunks'}


In [45]:
db.users.find({'score': 5}).count()

2

In [46]:
for user in db.users.find({'score': {'$lt': 5}}):
    pprint.pprint(user)

{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe0'),
 'dateofbirth': datetime.datetime(1919, 7, 7, 0, 0),
 'email': 'Bulma@Saiyan.com',
 'firstname': 'Bulma',
 'score': 2,
 'surname': 'Saiyan',
 'username': 'Bulma'}


In [47]:
db.users.find({'score': {'$lt': 5}}).count()

2

In [48]:
db.users.find_one({'score': {'$lte': 4}})

{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}

In [49]:
import re
rgx = re.compile('Saiyan.com$')

In [50]:
for user in db.users.find({'email': rgx}):
    pprint.pprint(user)

{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe0'),
 'dateofbirth': datetime.datetime(1919, 7, 7, 0, 0),
 'email': 'Bulma@Saiyan.com',
 'firstname': 'Bulma',
 'score': 2,
 'surname': 'Saiyan',
 'username': 'Bulma'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe1'),
 'dateofbirth': datetime.datetime(1955, 7, 7, 0, 0),
 'email': 'Trunks@Saiyan.com',
 'firstname': 'Trunks',
 'score': 5,
 'surname': 'Saiyan',
 'username': 'Trunks'}


### Sort results

You can sort the query results on the server side using the sort
method.

In [51]:
for user in db.users.find({'score': {'$lte': 4}}).sort([('score', pymongo.DESCENDING)]):
    pprint.pprint(user)

{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe0'),
 'dateofbirth': datetime.datetime(1919, 7, 7, 0, 0),
 'email': 'Bulma@Saiyan.com',
 'firstname': 'Bulma',
 'score': 2,
 'surname': 'Saiyan',
 'username': 'Bulma'}


In [52]:
for user in db.users.find({'score': {'$lte': 5}}).sort([('score', pymongo.DESCENDING)]):
    pprint.pprint(user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': ['Goku@GokuBrown.com', 'Goku@NikBearBrown.com'],
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe1'),
 'dateofbirth': datetime.datetime(1955, 7, 7, 0, 0),
 'email': 'Trunks@Saiyan.com',
 'firstname': 'Trunks',
 'score': 5,
 'surname': 'Saiyan',
 'username': 'Trunks'}
{'_id': ObjectId('5ac19b71a313fc02d6006fdf'),
 'dateofbirth': datetime.datetime(1903, 7, 7, 0, 0),
 'email': 'Vegeta@Saiyan.com',
 'firstname': 'Vegeta',
 'score': 3,
 'surname': 'Saiyan',
 'username': 'Vegeta'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe0'),
 'dateofbirth': datetime.datetime(1919, 7, 7, 0, 0),
 'email': 'Bulma@Saiyan.com',
 'firstname': 'Bulma',
 'score': 2,
 'surname': 'Saiyan',
 'username': 'Bulma'}


### Limit result size  

In [53]:
for user in db.users.find({'score': {'$lte': 5}}).sort([('score', pymongo.DESCENDING)]).limit(2):
    pprint.pprint(user)

{'_id': ObjectId('5ac19b70a313fc02d6006fda'),
 'dateofbirth': datetime.datetime(1973, 7, 8, 0, 0),
 'email': ['Goku@GokuBrown.com', 'Goku@NikBearBrown.com'],
 'firstname': 'Goku',
 'score': 5,
 'surname': 'Brown',
 'username': 'Goku'}
{'_id': ObjectId('5ac19b71a313fc02d6006fe1'),
 'dateofbirth': datetime.datetime(1955, 7, 7, 0, 0),
 'email': 'Trunks@Saiyan.com',
 'firstname': 'Trunks',
 'score': 5,
 'surname': 'Saiyan',
 'username': 'Trunks'}


In [54]:
def load_json(j):
    p=os.path.join("data/", j)
    print (p)
    with open(p, 'rU') as f:
      data = [json.loads(row) for row in f]
    return data 

In [55]:
tweets_j=load_json('db_tweets.json')
tweets_j[0]

data/db_tweets.json


{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:41:22 +0000 2015',
 'entities': {'hashtags': [{'indices': [128, 136], 'text': 'Bigdata'},
   {'indices': [137, 140], 'text': 'IoT'},
   {'indices': [139, 140], 'text': 'CIO'}],
  'symbols': [],
  'trends': [],
  'urls': [{'display_url': 'bit.ly/1jvQFcK',
    'expanded_url': 'http://bit.ly/1jvQFcK',
    'indices': [105, 127],
    'url': 'http://t.co/gLf50KtZKx'}],
  'user_mentions': [{'id': 474651213,
    'id_str': '474651213',
    'indices': [3, 15],
    'name': 'Marc Wilczek',
    'screen_name': 'MarcWilczek'}]},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577223085054558208,
 'id_str': '577223085054558208',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': Fa

In [56]:
print(tweets_j[0]['text'])

RT @MarcWilczek: In 2014, the digital universe equaled 1.7 megabytes a minute for every person on Earth: http://t.co/gLf50KtZKx #Bigdata #I…


In [57]:
tweets = pd.DataFrame(tweets_j)
tweets.head()

Unnamed: 0,contributors,coordinates,created_at,entities,extended_entities,favorite_count,favorited,filter_level,geo,id,...,place,possibly_sensitive,retweet_count,retweeted,retweeted_status,source,text,timestamp_ms,truncated,user
0,,,Sun Mar 15 21:41:22 +0000 2015,"{'hashtags': [{'text': 'Bigdata', 'indices': [...",,0,False,low,,577223085054558208,...,,False,0,False,{'created_at': 'Sun Mar 15 21:20:26 +0000 2015...,"<a href=""http://www.simbasystems.com"" rel=""nof...","RT @MarcWilczek: In 2014, the digital universe...",1426455682420,False,"{'id': 2704548373, 'id_str': '2704548373', 'na..."
1,,,Sun Mar 15 21:41:30 +0000 2015,"{'hashtags': [], 'trends': [], 'urls': [], 'us...",,0,False,low,,577223118327926785,...,,False,0,False,,"<a href=""http://twitter.com"" rel=""nofollow"">Tw...",Tanginang database to,1426455690353,False,"{'id': 405481463, 'id_str': '405481463', 'name..."
2,,,Sun Mar 15 21:41:31 +0000 2015,"{'hashtags': [{'text': 'AgTech', 'indices': [9...",,0,False,low,,577223124862697472,...,,False,0,False,{'created_at': 'Sun Mar 15 21:20:02 +0000 2015...,"<a href=""http://www.simbasystems.com"" rel=""nof...",RT @chrisco: The future of farming: robots + b...,1426455691911,False,"{'id': 2704548373, 'id_str': '2704548373', 'na..."
3,,,Sun Mar 15 21:41:40 +0000 2015,"{'hashtags': [{'text': 'cloud', 'indices': [92...","{'media': [{'id': 577217491841257473, 'id_str'...",0,False,low,,577223160195502080,...,,False,0,False,{'created_at': 'Sun Mar 15 21:19:08 +0000 2015...,"<a href=""http://www.simbasystems.com"" rel=""nof...",RT @Brian_Singer_: The EMC Federation Joins th...,1426455700335,False,"{'id': 2704548373, 'id_str': '2704548373', 'na..."
4,,,Sun Mar 15 21:41:46 +0000 2015,"{'hashtags': [{'text': 'BigData', 'indices': [...",,0,False,low,,577223187110330368,...,,False,0,False,{'created_at': 'Sun Mar 15 21:18:55 +0000 2015...,"<a href=""http://www.simbasystems.com"" rel=""nof...",RT @execedia: Learn @execedia: The Business Be...,1426455706752,False,"{'id': 2704548373, 'id_str': '2704548373', 'na..."


In [58]:
db.tweets

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'analytics'), 'tweets')

In [59]:
cnt=0
for tweet in tweets_j:
    print("tweet ", cnt)
    cnt+=1
    pprint.pprint(tweet) 
    print()    

tweet  0
{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:41:22 +0000 2015',
 'entities': {'hashtags': [{'indices': [128, 136], 'text': 'Bigdata'},
                           {'indices': [137, 140], 'text': 'IoT'},
                           {'indices': [139, 140], 'text': 'CIO'}],
              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'bit.ly/1jvQFcK',
                        'expanded_url': 'http://bit.ly/1jvQFcK',
                        'indices': [105, 127],
                        'url': 'http://t.co/gLf50KtZKx'}],
              'user_mentions': [{'id': 474651213,
                                 'id_str': '474651213',
                                 'indices': [3, 15],
                                 'name': 'Marc Wilczek',
                                 'screen_name': 'MarcWilczek'}]},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577223085054558208,
 'id_str': '5772

tweet  1
{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:41:30 +0000 2015',
 'entities': {'hashtags': [],
              'symbols': [],
              'trends': [],
              'urls': [],
              'user_mentions': []},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577223118327926785,
 'id_str': '577223118327926785',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'tl',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>',
 'text': 'Tanginang database to',
 'timestamp_ms': '1426455690353',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Sat Nov 05 11:21:24 +0000 2011',
          'default_profile': False,
          'default_profile_image'

                               'statuses_count': 2395,
                               'time_zone': 'Mountain Time (US & Canada)',
                               'url': 'http://about.me/chrisco',
                               'utc_offset': -21600,
                               'verified': False}},
 'source': '<a href="http://www.simbasystems.com" '
           'rel="nofollow">NoSQLDigest</a>',
 'text': 'RT @chrisco: The future of farming: robots + big data: '
         'http://t.co/MiEIZmKzEb via @KurzweilAINews #AgTech #AgBots #Robotics '
         '#Automation #Io…',
 'timestamp_ms': '1426455691911',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Sun Aug 03 17:07:24 +0000 2014',
          'default_profile': True,
          'default_profile_image': False,
          'description': 'NoSQL Digest of tweets.',
          'favourites_count': 72,
          'follow_request_sent': None,
          'followers_count': 5418,
          'following': None,
      

                               'url': 'http://Orbitera.com',
                               'utc_offset': -25200,
                               'verified': False}},
 'source': '<a href="http://www.simbasystems.com" '
           'rel="nofollow">NoSQLDigest</a>',
 'text': 'RT @Brian_Singer_: The EMC Federation Joins the OpenStack Foundation '
         'http://t.co/i37gbFthQw #cloud #bigdata #aws #ec2 '
         'http://t.co/uYQIXOSMHs',
 'timestamp_ms': '1426455700335',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Sun Aug 03 17:07:24 +0000 2014',
          'default_profile': True,
          'default_profile_image': False,
          'description': 'NoSQL Digest of tweets.',
          'favourites_count': 72,
          'follow_request_sent': None,
          'followers_count': 5418,
          'following': None,
          'friends_count': 12,
          'geo_enabled': False,
          'id': 2704548373,
          'id_str': '2704548373',
          'is_t

          'name': 'NoSQL',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_tile': False,
          'profile_image_url': 'http://pbs.twimg.com/profile_images/499257180009529344/CSWhr7LZ_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/499257180009529344/CSWhr7LZ_normal.jpeg',
          'profile_link_color': '0084B4',
          'profile_sidebar_border_color': 'C0DEED',
          'profile_sidebar_fill_color': 'DDEEF6',
          'profile_text_color': '333333',
          'profile_use_background_image': True,
          'protected': False,
          'screen_name': 'NoSQLDigest',
          'statuses_count': 362923,
          'time_zone': None,
          'url': None,
          'utc_offset': None,
    

                      'coordinates': None,
                      'created_at': 'Sun Mar 15 21:18:37 +0000 2015',
                      'entities': {'hashtags': [{'indices': [74, 82],
                                                 'text': 'bigdata'}],
                                   'symbols': [],
                                   'trends': [],
                                   'urls': [{'display_url': 'tek.io/1dQ7avz',
                                             'expanded_url': 'http://tek.io/1dQ7avz',
                                             'indices': [83, 105],
                                             'url': 'http://t.co/6HWjCv3BL5'}],
                                   'user_mentions': [{'id': 6486602,
                                                      'id_str': '6486602',
                                                      'indices': [4, 17],
                                                      'name': 'TechRepublic',
                                         

          'follow_request_sent': None,
          'followers_count': 40,
          'following': None,
          'friends_count': 58,
          'geo_enabled': False,
          'id': 2671926047,
          'id_str': '2671926047',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 0,
          'location': '',
          'name': 'emily',
          'notifications': None,
          'profile_background_color': 'FFEC9E',
          'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/544051928376688640/ej3l6k2O.jpeg',
          'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/544051928376688640/ej3l6k2O.jpeg',
          'profile_background_tile': True,
          'profile_banner_url': 'https://pbs.twimg.com/profile_banners/2671926047/1415781336',
          'profile_image_url': 'http://pbs.twimg.com/profile_images/532450514248937472/HZ1fR3S9_normal.jpeg',
          'profile_image_url_https': 'https://pb

                                              'url': 'http://t.co/jRLp7xtLDQ'}],
                                   'symbols': [],
                                   'trends': [],
                                   'urls': [{'display_url': 'vytm.in/DDsJLg',
                                             'expanded_url': 'http://vytm.in/DDsJLg',
                                             'indices': [48, 70],
                                             'url': 'http://t.co/fi0MuG9QUH'}],
                                   'user_mentions': []},
                      'extended_entities': {'media': [{'display_url': 'pic.twitter.com/jRLp7xtLDQ',
                                                       'expanded_url': 'http://twitter.com/MatthewB_SEA/status/577217018371420161/photo/1',
                                                       'id': 577217018325307392,
                                                       'id_str': '577217018325307392',
                                             

                                                 'text': 'BigData'}],
                                   'symbols': [],
                                   'trends': [],
                                   'urls': [{'display_url': 'onforb.es/1Ez1as3',
                                             'expanded_url': 'http://onforb.es/1Ez1as3',
                                             'indices': [51, 73],
                                             'url': 'http://t.co/w4xw93Tw19'}],
                                   'user_mentions': [{'id': 91478624,
                                                      'id_str': '91478624',
                                                      'indices': [78, 85],
                                                      'name': 'Forbes',
                                                      'screen_name': 'Forbes'}]},
                      'favorite_count': 5,
                      'favorited': False,
                      'filter_level': 'low',
          

 'entities': {'hashtags': [{'indices': [103, 108], 'text': 'data'},
                           {'indices': [109, 112], 'text': 'IT'},
                           {'indices': [113, 119], 'text': 'cloud'},
                           {'indices': [120, 128], 'text': 'bigdata'},
                           {'indices': [129, 133], 'text': 'aws'}],
              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'vytm.in/03rZDw',
                        'expanded_url': 'http://vytm.in/03rZDw',
                        'indices': [80, 102],
                        'url': 'http://t.co/tkSfjPpods'}],
              'user_mentions': [{'id': 1961001632,
                                 'id_str': '1961001632',
                                 'indices': [3, 13],
                                 'name': 'Carv Moore',
                                 'screen_name': 'CarvMoore'}]},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577223

                        'url': 'https://t.co/BGmnXAOlIY'}],
              'user_mentions': [{'id': 534563976,
                                 'id_str': '534563976',
                                 'indices': [3, 14],
                                 'name': 'Kirk Borne',
                                 'screen_name': 'KirkDBorne'},
                                {'id': 768197780,
                                 'id_str': '768197780',
                                 'indices': [125, 133],
                                 'name': 'Jake Vanderplas',
                                 'screen_name': 'jakevdp'}]},
 'extended_entities': {'media': [{'display_url': 'pic.twitter.com/pVuNqE9ErO',
                                  'expanded_url': 'http://twitter.com/KirkDBorne/status/561667158632325120/photo/1',
                                  'id': 561667158137401346,
                                  'id_str': '561667158137401346',
                                  'indices': [139, 140],


                               'profile_image_url_https': 'https://pbs.twimg.com/profile_images/549687506484985856/3NPweeB1_normal.jpeg',
                               'profile_link_color': '0084B4',
                               'profile_sidebar_border_color': 'FFFFFF',
                               'profile_sidebar_fill_color': 'DDEEF6',
                               'profile_text_color': '333333',
                               'profile_use_background_image': True,
                               'protected': False,
                               'screen_name': 'KirkDBorne',
                               'statuses_count': 33686,
                               'time_zone': 'Eastern Time (US & Canada)',
                               'url': 'http://www.linkedin.com/in/kirkdborne',
                               'utc_offset': -14400,
                               'verified': False}},
 'source': '<a href="http://twitter.com/download/android" '
           'rel="nofollow">Twitter for

                                             'indices': [95, 117],
                                             'url': 'http://t.co/SgcfxbdQvZ'}],
                                   'user_mentions': []},
                      'extended_entities': {'media': [{'display_url': 'pic.twitter.com/QCNvrNx6AN',
                                                       'expanded_url': 'http://twitter.com/caitlinrgreen/status/542323046179405824/photo/1',
                                                       'id': 542323043687600128,
                                                       'id_str': '542323043687600128',
                                                       'indices': [118, 140],
                                                       'media_url': 'http://pbs.twimg.com/media/B4a371SCEAANG7F.jpg',
                                                       'media_url_https': 'https://pbs.twimg.com/media/B4a371SCEAANG7F.jpg',
                                                       'sizes': {'

          'url': None,
          'utc_offset': None,
          'verified': False}}

tweet  19
{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:42:56 +0000 2015',
 'entities': {'hashtags': [{'indices': [31, 39], 'text': 'BigData'},
                           {'indices': [40, 50], 'text': 'Hackathon'},
                           {'indices': [124, 128], 'text': 'BDH'}],
              'media': [{'display_url': 'pic.twitter.com/jJ7eG35B4x',
                         'expanded_url': 'http://twitter.com/CANSQL/status/577092065839996928/photo/1',
                         'id': 577092064967413760,
                         'id_str': '577092064967413760',
                         'indices': [139, 140],
                         'media_url': 'http://pbs.twimg.com/media/CAI-LO8UUAAZqiw.jpg',
                         'media_url_https': 'https://pbs.twimg.com/media/CAI-LO8UUAAZqiw.jpg',
                         'sizes': {'large': {'h': 576,
                                    

          'description': 'NoSQL Digest of tweets.',
          'favourites_count': 72,
          'follow_request_sent': None,
          'followers_count': 5418,
          'following': None,
          'friends_count': 12,
          'geo_enabled': False,
          'id': 2704548373,
          'id_str': '2704548373',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 1801,
          'location': '',
          'name': 'NoSQL',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_tile': False,
          'profile_image_url': 'http://pbs.twimg.com/profile_images/499257180009529344/CSWhr7LZ_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/499257180009529344/CSWhr7LZ_normal.jpe

          'favourites_count': 72,
          'follow_request_sent': None,
          'followers_count': 5418,
          'following': None,
          'friends_count': 12,
          'geo_enabled': False,
          'id': 2704548373,
          'id_str': '2704548373',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 1802,
          'location': '',
          'name': 'NoSQL',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_tile': False,
          'profile_image_url': 'http://pbs.twimg.com/profile_images/499257180009529344/CSWhr7LZ_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/499257180009529344/CSWhr7LZ_normal.jpeg',
          'profile_link_color': '0084B4',
      

          'url': 'http://www.microsoft.com/en-us/sqlserver',
          'utc_offset': -21600,
          'verified': False}}

tweet  23
{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:43:15 +0000 2015',
 'entities': {'hashtags': [{'indices': [64, 68], 'text': 'php'},
                           {'indices': [69, 78], 'text': 'database'}],
              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'dlvr.it/8zHRjx',
                        'expanded_url': 'http://dlvr.it/8zHRjx',
                        'indices': [40, 62],
                        'url': 'http://t.co/OGcvtI1663'}],
              'user_mentions': [{'id': 124013422,
                                 'id_str': '124013422',
                                 'indices': [3, 12],
                                 'name': 'php users',
                                 'screen_name': 'phpusers'}]},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': No

          'follow_request_sent': None,
          'followers_count': 75,
          'following': None,
          'friends_count': 27,
          'geo_enabled': False,
          'id': 2704088773,
          'id_str': '2704088773',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 44,
          'location': '',
          'name': 'Michael Taylor',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_tile': False,
          'profile_image_url': 'http://pbs.twimg.com/profile_images/495915526917328897/7fq4kjkJ_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/495915526917328897/7fq4kjkJ_normal.jpeg',
          'profile_link_color': '0084B4',
          'profile_sidebar_border_c

 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'source': '<a href="http://ifttt.com" rel="nofollow">IFTTT</a>',
 'text': 'Tinder swipes right for database-as-a-service from Rackspace From '
         'the start, Tinder has used the MongoDB NoSQL ope '
         'http://t.co/Qh5Lkvcyyz',
 'timestamp_ms': '1426455869495',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Wed Aug 08 13:31:48 +0000 2012',
          'default_profile': True,
          'default_profile_image': False,
          'description': 'Experimental bot .. maintained by @mendicot ..',
          'favourites_count': 0,
          'follow_request_sent': None,
          'followers_count': 41,
          'following': None,
          'friends_count': 1,
          'geo_enabled': False,
          'id': 745204837,
          'id_str': '745204837',
          'is_translator': False,
          'lang': 'en',
   

                                   'small': {'h': 177,
                                             'resize': 'fit',
                                             'w': 340},
                                   'thumb': {'h': 150,
                                             'resize': 'crop',
                                             'w': 150}},
                         'source_status_id': 577175083527073793,
                         'source_status_id_str': '577175083527073793',
                         'type': 'photo',
                         'url': 'http://t.co/qTnymohfkC'}],
              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'ibm.biz/AnalyticsAntics',
                        'expanded_url': 'http://ibm.biz/AnalyticsAntics',
                        'indices': [107, 129],
                        'url': 'http://t.co/Oq7Dkb4TRy'}],
              'user_mentions': [{'id': 267283568,
                                 'id_str': '267283568',
     

              'user_mentions': [{'id': 19365210,
                                 'id_str': '19365210',
                                 'indices': [3, 15],
                                 'name': 'Craig Milroy',
                                 'screen_name': 'CraigMilroy'},
                                {'id': 18949452,
                                 'id_str': '18949452',
                                 'indices': [135, 138],
                                 'name': 'Financial Times',
                                 'screen_name': 'FT'}]},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577224030501117953,
 'id_str': '577224030501117953',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'retweeted_status': {'contributors

                                {'id': 91478624,
                                 'id_str': '91478624',
                                 'indices': [101, 108],
                                 'name': 'Forbes',
                                 'screen_name': 'Forbes'}]},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577224031532916737,
 'id_str': '577224031532916737',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'retweeted_status': {'contributors': None,
                      'coordinates': None,
                      'created_at': 'Sun Mar 15 21:44:15 +0000 2015',
                      'entities': {'hashtags': [{'indices': [37, 47],
                                                 'text': 'Analytics'},
                    

                                              'type': 'photo',
                                              'url': 'http://t.co/ruaoqLTi7K'}],
                                   'symbols': [],
                                   'trends': [],
                                   'urls': [],
                                   'user_mentions': []},
                      'extended_entities': {'media': [{'display_url': 'pic.twitter.com/ruaoqLTi7K',
                                                       'expanded_url': 'http://twitter.com/CANSQL/status/577090478081982464/photo/1',
                                                       'id': 577090477515673601,
                                                       'id_str': '577090477515673601',
                                                       'indices': [115, 137],
                                                       'media_url': 'http://pbs.twimg.com/media/CAI8u1OVEAEBsiM.jpg',
                                                       

 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577224064390983680,
 'id_str': '577224064390983680',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'source': '<a href="http://dlvr.it" rel="nofollow">dlvr.it</a>',
 'text': '☆★☆ JOB ALERT ☆★☆ #ITJob #Job #Sioux Falls - Database Analyst 4 '
         '(SQL) http://t.co/5VsZsiRecp ☜ view full details',
 'timestamp_ms': '1426455915912',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Sat Sep 27 12:01:45 +0000 2014',
          'default_profile': False,
          'default_profile_image': False,
          'description': 'Hiring, posting job opportunities. Open networker '
                         '#Recruiter.  #Recruitment\u202c |#Recruiters\u202c | '
                  

 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'retweeted_status': {'contributors': None,
                      'coordinates': None,
                      'created_at': 'Sun Mar 15 18:47:15 +0000 2015',
                      'entities': {'hashtags': [{'indices': [41, 47],
                                                 'text': 'Cloud'},
                                                {'indices': [48, 58],
                                                 'text': 'Wearables'},
                                                {'indices': [59, 67],
                                                 'text': 'BigData'},
                                                {'indices': [68, 72],
                                                 'text': 'IOT'},
                                                {'indices': [73, 84],
                                                 'text': '3DPrinting'},
                                              

 'text': 'RT CraigMilroy: Important shift in thinking... The BBVA bank vaults '
         'that hoard #bigdata instead of bullion http://t.co/k0sIrpiELj; via '
         'FT …',
 'timestamp_ms': '1426455921053',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Sat Dec 20 11:02:28 +0000 2014',
          'default_profile': False,
          'default_profile_image': False,
          'description': 'FinTech retweets source, everyday. Discussion at '
                         'http://www.linkd.in/1xoCnBH. Curated Newsletter at '
                         'http://bit.ly/fintech_newsletter',
          'favourites_count': 17563,
          'follow_request_sent': None,
          'followers_count': 2312,
          'following': None,
          'friends_count': 2170,
          'geo_enabled': False,
          'id': 2933447393,
          'id_str': '2933447393',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 780,
          'location

          'friends_count': 284,
          'geo_enabled': True,
          'id': 1076030426,
          'id_str': '1076030426',
          'is_translator': False,
          'lang': 'ja',
          'listed_count': 2,
          'location': '',
          'name': 'めぐ',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_tile': False,
          'profile_banner_url': 'https://pbs.twimg.com/profile_banners/1076030426/1422528498',
          'profile_image_url': 'http://pbs.twimg.com/profile_images/575964029925003265/zV_GKcHb_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/575964029925003265/zV_GKcHb_normal.jpeg',
          'profile_link_color': '0084B4',
          'profile_sidebar_border_color': 'C0DEED',
  

          'favourites_count': 100,
          'follow_request_sent': None,
          'followers_count': 168,
          'following': None,
          'friends_count': 254,
          'geo_enabled': False,
          'id': 59443497,
          'id_str': '59443497',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 5,
          'location': '',
          'name': '1lio',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/515209998/big-star-off.png',
          'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/515209998/big-star-off.png',
          'profile_background_tile': True,
          'profile_banner_url': 'https://pbs.twimg.com/profile_banners/59443497/1356192679',
          'profile_image_url': 'http://pbs.twimg.com/profile_images/516012703526449152/QgMua0Pc_normal.jpeg',
          'profile_image_url_h

          'is_translator': False,
          'lang': 'en',
          'listed_count': 64,
          'location': '',
          'name': 'Army Vet',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_tile': False,
          'profile_image_url': 'http://pbs.twimg.com/profile_images/378800000607695829/60797668007f027d7bd06f30bacfc9f4_normal.png',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/378800000607695829/60797668007f027d7bd06f30bacfc9f4_normal.png',
          'profile_link_color': '0084B4',
          'profile_sidebar_border_color': 'C0DEED',
          'profile_sidebar_fill_color': 'DDEEF6',
          'profile_text_color': '333333',
          'profile_use_background_image': True,
          'protected': False,

                               'following': None,
                               'friends_count': 453,
                               'geo_enabled': False,
                               'id': 873764377,
                               'id_str': '873764377',
                               'is_translator': False,
                               'lang': 'es',
                               'listed_count': 8,
                               'location': 'Lima, Perú',
                               'name': 'Enzo Angeles',
                               'notifications': None,
                               'profile_background_color': 'C0DEED',
                               'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
                               'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
                               'profile_background_tile': False,
                               'profile_banner_url': 'h

          'location': 'Minneapolis',
          'name': 'Bill Corbett',
          'notifications': None,
          'profile_background_color': '5B6EDC',
          'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/2633080/robots.jpg',
          'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/2633080/robots.jpg',
          'profile_background_tile': False,
          'profile_banner_url': 'https://pbs.twimg.com/profile_banners/15120464/1419711236',
          'profile_image_url': 'http://pbs.twimg.com/profile_images/548931961792647168/FBRXATKc_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/548931961792647168/FBRXATKc_normal.jpeg',
          'profile_link_color': '0000FF',
          'profile_sidebar_border_color': '87BC44',
          'profile_sidebar_fill_color': 'E0FF92',
          'profile_text_color': '000000',
          'profile_use_background_image': True,
          'protected'

                                             'indices': [38, 60],
                                             'url': 'http://t.co/yzXXgd9bOO'}],
                                   'user_mentions': []},
                      'favorite_count': 0,
                      'favorited': False,
                      'filter_level': 'low',
                      'geo': None,
                      'id': 577213950762168320,
                      'id_str': '577213950762168320',
                      'in_reply_to_screen_name': None,
                      'in_reply_to_status_id': None,
                      'in_reply_to_status_id_str': None,
                      'in_reply_to_user_id': None,
                      'in_reply_to_user_id_str': None,
                      'lang': 'en',
                      'place': None,
                      'possibly_sensitive': False,
                      'retweet_count': 2,
                      'retweeted': False,
                      'source': '<a href="http://wi

 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'retweeted_status': {'contributors': None,
                      'coordinates': None,
                      'created_at': 'Sun Mar 15 21:20:12 +0000 2015',
                      'entities': {'hashtags': [],
                                   'symbols': [],
                                   'trends': [],
                                   'urls': [{'display_url': 'ow.ly/Kl3qd',
                                             'expanded_url': 'http://ow.ly/Kl3qd',
                                             'indices': [114, 136],
                                             'url': 'http://t.co/US7372ur8P'}],
                                   'user_mentions': []},
                      'favorite_count': 9,
                      'favorited': False,
                      'filter_level': 'low',
                      'geo': None

              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'bit.ly/1ERx7we',
                        'expanded_url': 'http://bit.ly/1ERx7we',
                        'indices': [62, 84],
                        'url': 'http://t.co/8zMhaSEg1j'}],
              'user_mentions': []},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577224819164364800,
 'id_str': '577224819164364800',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'source': '<a href="http://dlvr.it" rel="nofollow">dlvr.it</a>',
 'text': '★ JOB ALERT ★ #ITJob #Job #San Diego - SQL Database Developer '
         'http://t.co/8zMhaSEg1j ☜ view full details #job',
 'timestamp_ms': '1426456095864',
 'truncated': False,
 'user': {'contr

                               'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme9/bg.gif',
                               'profile_background_tile': False,
                               'profile_banner_url': 'https://pbs.twimg.com/profile_banners/15161984/1419291070',
                               'profile_image_url': 'http://pbs.twimg.com/profile_images/552173476941803520/HQrWYZHX_normal.jpeg',
                               'profile_image_url_https': 'https://pbs.twimg.com/profile_images/552173476941803520/HQrWYZHX_normal.jpeg',
                               'profile_link_color': '5AB5CC',
                               'profile_sidebar_border_color': 'FFFFFF',
                               'profile_sidebar_fill_color': '252429',
                               'profile_text_color': '666666',
                               'profile_use_background_image': True,
                               'protected': False,
                               'screen_n

          'profile_sidebar_border_color': 'C0DEED',
          'profile_sidebar_fill_color': 'DDEEF6',
          'profile_text_color': '333333',
          'profile_use_background_image': True,
          'protected': False,
          'screen_name': 'psennhauser',
          'statuses_count': 2090,
          'time_zone': 'Bern',
          'url': 'http://swissreporter.ch',
          'utc_offset': 3600,
          'verified': False}}

tweet  81
{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:48:55 +0000 2015',
 'entities': {'hashtags': [],
              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'twitter.com/washingtonpost…',
                        'expanded_url': 'https://twitter.com/washingtonpost/status/577221464937627648',
                        'indices': [119, 140],
                        'url': 'https://t.co/pBXDMGRzkf'}],
              'user_mentions': [{'id': 15161984,
                                 'id_str': '151

                                 'screen_name': 'MattBors'}]},
 'favorite_count': 0,
 'favorited': False,
 'filter_level': 'low',
 'geo': None,
 'id': 577225009678196737,
 'id_str': '577225009678196737',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'retweeted_status': {'contributors': None,
                      'coordinates': None,
                      'created_at': 'Sun Mar 15 21:46:09 +0000 2015',
                      'entities': {'hashtags': [],
                                   'symbols': [],
                                   'trends': [],
                                   'urls': [{'display_url': 'twitter.com/washingtonpost…',
                                             'expanded_url': 'https://twitter.com/washingtonpost/status/577221464937627648',
    

          'profile_use_background_image': True,
          'protected': False,
          'screen_name': 'Toussaint215',
          'statuses_count': 22311,
          'time_zone': 'Eastern Time (US & Canada)',
          'url': 'http://www.iSocialitemedia.com',
          'utc_offset': -14400,
          'verified': False}}

tweet  84
{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:49:11 +0000 2015',
 'entities': {'hashtags': [{'indices': [70, 78], 'text': 'BigData'},
                           {'indices': [79, 89], 'text': 'Analytics'},
                           {'indices': [113, 127], 'text': 'CyberSecurity'}],
              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'sas.com/en_us/software…',
                        'expanded_url': 'http://www.sas.com/en_us/software/fraud-security-intelligence/cybersecurity-solutions.html#.VQTAO2lM9bE.twitter',
                        'indices': [90, 112],
                        'url': 'h

          'notifications': None,
          'profile_background_color': '642D8B',
          'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/378800000115239295/a94218196fb617f811f2c9951a901936.jpeg',
          'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/378800000115239295/a94218196fb617f811f2c9951a901936.jpeg',
          'profile_background_tile': False,
          'profile_banner_url': 'https://pbs.twimg.com/profile_banners/15792665/1414554310',
          'profile_image_url': 'http://pbs.twimg.com/profile_images/378800000623303116/d45e15d6f3feeebb7fb0d364c73b8668_normal.png',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/378800000623303116/d45e15d6f3feeebb7fb0d364c73b8668_normal.png',
          'profile_link_color': '9266CC',
          'profile_sidebar_border_color': 'FFFFFF',
          'profile_sidebar_fill_color': '7AC3EE',
          'profile_text_color': '3D1957',
          'profile

          'favourites_count': 0,
          'follow_request_sent': None,
          'followers_count': 68,
          'following': None,
          'friends_count': 79,
          'geo_enabled': False,
          'id': 529010508,
          'id_str': '529010508',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 2,
          'location': 'Milwaukee, Wisconsin',
          'name': 'Jerry Broitzman ',
          'notifications': None,
          'profile_background_color': '000000',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_tile': False,
          'profile_image_url': 'http://pbs.twimg.com/profile_images/505724180982018048/PuRbQVby_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/505724180982018048/PuRbQVby_normal.jpeg',
          'profile_link_c

 'user': {'contributors_enabled': False,
          'created_at': 'Mon May 11 23:10:42 +0000 2009',
          'default_profile': True,
          'default_profile_image': False,
          'description': "I'm a conservative Christian mom who believes in the "
                         'Constitution.  No Obama, No Hillary, Respect the '
                         'Bill of Rights. Respect Human rights.',
          'favourites_count': 2263,
          'follow_request_sent': None,
          'followers_count': 1083,
          'following': None,
          'friends_count': 1663,
          'geo_enabled': False,
          'id': 39369267,
          'id_str': '39369267',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 20,
          'location': 'Southern California',
          'name': 'Katie G',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
 

 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Tue Jul 23 07:30:28 +0000 2013',
          'default_profile': True,
          'default_profile_image': False,
          'description': 'A keen forensic document and forensic handwriting '
                         'expert.  Follow for updates of recent research, new '
                         'stories and developments in both areas.',
          'favourites_count': 0,
          'follow_request_sent': None,
          'followers_count': 415,
          'following': None,
          'friends_count': 371,
          'geo_enabled': False,
          'id': 1614672469,
          'id_str': '1614672469',
          'is_translator': False,
          'lang': 'en-gb',
          'listed_count': 7,
          'location': '',
          'name': 'Forensic Doc & HW',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/th

                                                      'id_str': '38723477',
                                                      'indices': [64, 78],
                                                      'name': 'Eddie Beard',
                                                      'screen_name': 'EddieBeardDNP'},
                                                     {'id': 3036904339,
                                                      'id_str': '3036904339',
                                                      'indices': [79, 93],
                                                      'name': 'Mary Jane Neri',
                                                      'screen_name': 'MaryJaneNeri1'}]},
                      'extended_entities': {'media': [{'display_url': 'pic.twitter.com/7dwgtphJ3B',
                                                       'expanded_url': 'http://twitter.com/DByresPHC/status/576879818282663937/photo/1',
                                                      

                               'created_at': 'Sat May 23 19:09:46 +0000 2009',
                               'default_profile': False,
                               'default_profile_image': False,
                               'description': 'Product Marketing Manager. '
                                              'Master Data Management, Data '
                                              'Quality & Data Governance '
                                              'Blogger. Data Science, Hadoop, '
                                              'Big Data & Cloud. Porsche fan. '
                                              'My Opinions!',
                               'favourites_count': 2104,
                               'follow_request_sent': None,
                               'followers_count': 6269,
                               'following': None,
                               'friends_count': 2858,
                               'geo_enabled': True,
                     

                               'listed_count': 20,
                               'location': 'Pisa, Italy',
                               'name': 'Calogero Ravenna',
                               'notifications': None,
                               'profile_background_color': 'C0DEED',
                               'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
                               'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
                               'profile_background_tile': False,
                               'profile_banner_url': 'https://pbs.twimg.com/profile_banners/405569213/1426445966',
                               'profile_image_url': 'http://pbs.twimg.com/profile_images/378800000378422060/d92eb44a28969231e3ddec769bd7c891_normal.jpeg',
                               'profile_image_url_https': 'https://pbs.twimg.com/profile_images/378800000378422060/d92eb44a28969231e3ddec

                                                        'medium': {'h': 401,
                                                                   'resize': 'fit',
                                                                   'w': 600},
                                                        'small': {'h': 227,
                                                                  'resize': 'fit',
                                                                  'w': 340},
                                                        'thumb': {'h': 150,
                                                                  'resize': 'crop',
                                                                  'w': 150}},
                                              'type': 'photo',
                                              'url': 'http://t.co/0bFvCrdb4R'}],
                                   'symbols': [],
                                   'trends': [],
                                   'urls': [

 'text': 'RT @takuma10feet: '
         'モブスタイルス15周年広島来てくれたみんなありがとう！写真は今日のマンウィズとのオープニング一曲目"database '
         'feat.takuma"の1分前。楽しかったなぁ。そして今日のthe trust… https://t.co/BTDfzG2…',
 'timestamp_ms': '1426456304433',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Fri Mar 14 12:43:36 +0000 2014',
          'default_profile': True,
          'default_profile_image': False,
          'description': None,
          'favourites_count': 135,
          'follow_request_sent': None,
          'followers_count': 25,
          'following': None,
          'friends_count': 77,
          'geo_enabled': False,
          'id': 2389017302,
          'id_str': '2389017302',
          'is_translator': False,
          'lang': 'ja',
          'listed_count': 0,
          'location': '',
          'name': 'jeanken3104',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/th

          'listed_count': 50,
          'location': 'Madrid - Spain',
          'name': 'Tania Rincón',
          'notifications': None,
          'profile_background_color': 'EDECE9',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme3/bg.gif',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme3/bg.gif',
          'profile_background_tile': False,
          'profile_banner_url': 'https://pbs.twimg.com/profile_banners/314614593/1416055084',
          'profile_image_url': 'http://pbs.twimg.com/profile_images/550487043969933312/SuK4JSR2_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/550487043969933312/SuK4JSR2_normal.jpeg',
          'profile_link_color': '088253',
          'profile_sidebar_border_color': 'D3D2CF',
          'profile_sidebar_fill_color': 'E3E2DE',
          'profile_text_color': '634047',
          'profile_use_background_image': True,
          'protected'

 'text': 'Everilion:the evolution of #Commerce: http://t.co/pIKJ0ZphGK '
         '#bigdata #ecommerce #gifts #shopping #WanasConceptStore\n'
         '#Zamalek #Cairo',
 'timestamp_ms': '1426456368759',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Tue Mar 16 14:04:54 +0000 2010',
          'default_profile': True,
          'default_profile_image': False,
          'description': 'Economista. . Internet',
          'favourites_count': 295,
          'follow_request_sent': None,
          'followers_count': 2579,
          'following': None,
          'friends_count': 2206,
          'geo_enabled': True,
          'id': 123568642,
          'id_str': '123568642',
          'is_translator': False,
          'lang': 'es',
          'listed_count': 202,
          'location': 'España . Madrid',
          'name': 'sara',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http:/

 'timestamp_ms': '1426456371080',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Fri Apr 23 21:21:50 +0000 2010',
          'default_profile': True,
          'default_profile_image': False,
          'description': "MD, Epicurean, Children's health and nutrition. "
                         'Energy/environment, renewable energy projects, '
                         'energy saving. Weekendcook.',
          'favourites_count': 67,
          'follow_request_sent': None,
          'followers_count': 368,
          'following': None,
          'friends_count': 1230,
          'geo_enabled': True,
          'id': 136402927,
          'id_str': '136402927',
          'is_translator': False,
          'lang': 'en',
          'listed_count': 14,
          'location': 'Netherlands',
          'name': 'Joyce G',
          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/ima

          'notifications': None,
          'profile_background_color': 'C0DEED',
          'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
          'profile_background_tile': False,
          'profile_image_url': 'http://pbs.twimg.com/profile_images/499257180009529344/CSWhr7LZ_normal.jpeg',
          'profile_image_url_https': 'https://pbs.twimg.com/profile_images/499257180009529344/CSWhr7LZ_normal.jpeg',
          'profile_link_color': '0084B4',
          'profile_sidebar_border_color': 'C0DEED',
          'profile_sidebar_fill_color': 'DDEEF6',
          'profile_text_color': '333333',
          'profile_use_background_image': True,
          'protected': False,
          'screen_name': 'NoSQLDigest',
          'statuses_count': 362937,
          'time_zone': None,
          'url': None,
          'utc_offset': None,
          'verified': False}}



 'id_str': '577226119805448193',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'lang': 'en',
 'place': None,
 'possibly_sensitive': False,
 'retweet_count': 0,
 'retweeted': False,
 'source': '<a href="http://www.nyndesigns.com/social-tweeter" '
           'rel="nofollow">Social Tweeter</a>',
 'text': 'Quickly &amp; easily manage your website &amp; database. No more '
         'tedious website builders which make you do all of the work\n'
         'http://t.co/km6NDdcwoI',
 'timestamp_ms': '1426456405961',
 'truncated': False,
 'user': {'contributors_enabled': False,
          'created_at': 'Tue Feb 26 18:19:13 +0000 2013',
          'default_profile': True,
          'default_profile_image': False,
          'description': None,
          'favourites_count': 0,
          'follow_request_sent': None,
          'followers_count': 659,
          'following': None,
    

          'utc_offset': None,
          'verified': False}}

tweet  144
{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:54:05 +0000 2015',
 'entities': {'hashtags': [{'indices': [53, 59], 'text': 'Cloud'},
                           {'indices': [60, 70], 'text': 'Wearables'},
                           {'indices': [71, 79], 'text': 'BigData'},
                           {'indices': [80, 84], 'text': 'IOT'},
                           {'indices': [85, 96], 'text': '3DPrinting'},
                           {'indices': [97, 104], 'text': 'Selfie'}],
              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'wp.me/p3tudK-9p',
                        'expanded_url': 'http://wp.me/p3tudK-9p',
                        'indices': [106, 128],
                        'url': 'http://t.co/cTAQxjQ4Oq'}],
              'user_mentions': [{'id': 45871087,
                                 'id_str': '45871087',
                             

 'created_at': 'Sun Mar 15 21:54:05 +0000 2015',
 'entities': {'hashtags': [{'indices': [23, 31], 'text': 'BigData'},
                           {'indices': [46, 56], 'text': 'Marketers'}],
              'media': [{'display_url': 'pic.twitter.com/ohK7dUk9LB',
                         'expanded_url': 'http://twitter.com/DataMentors/status/577220989567782912/photo/1',
                         'id': 577220989412573184,
                         'id_str': '577220989412573184',
                         'indices': [80, 102],
                         'media_url': 'http://pbs.twimg.com/media/CAKzbn7WAAA9Kos.jpg',
                         'media_url_https': 'https://pbs.twimg.com/media/CAKzbn7WAAA9Kos.jpg',
                         'sizes': {'large': {'h': 412,
                                             'resize': 'fit',
                                             'w': 864},
                                   'medium': {'h': 286,
                                              'resize': 'fit',
 

                      'place': None,
                      'possibly_sensitive': False,
                      'retweet_count': 1,
                      'retweeted': False,
                      'source': '<a href="http://twitter.com/download/iphone" '
                                'rel="nofollow">Twitter for iPhone</a>',
                      'text': 'The third #technological era is coming #IoT '
                              'http://t.co/KbRBtWeCyJ #wearables #SmartCities '
                              '#cloud #BigData http://t.co/Ia1tcTLVuP',
                      'truncated': False,
                      'user': {'contributors_enabled': False,
                               'created_at': 'Wed Apr 23 09:06:32 +0000 2014',
                               'default_profile': False,
                               'default_profile_image': False,
                               'description': 'Queremos ser su SmartForce. '
                                              'Software y Apps; Io

                                                 'text': 'ads'}],
                                   'symbols': [],
                                   'trends': [],
                                   'urls': [{'display_url': 'bprofile.com',
                                             'expanded_url': 'http://www.bprofile.com',
                                             'indices': [100, 122],
                                             'url': 'http://t.co/oLgTS7Ewnv'}],
                                   'user_mentions': []},
                      'favorite_count': 6,
                      'favorited': False,
                      'filter_level': 'low',
                      'geo': None,
                      'id': 576488403446804480,
                      'id_str': '576488403446804480',
                      'in_reply_to_screen_name': None,
                      'in_reply_to_status_id': None,
                      'in_reply_to_status_id_str': None,
                      'in_reply_to

                               'profile_background_color': 'C0DEED',
                               'profile_background_image_url': 'http://abs.twimg.com/images/themes/theme1/bg.png',
                               'profile_background_image_url_https': 'https://abs.twimg.com/images/themes/theme1/bg.png',
                               'profile_background_tile': False,
                               'profile_image_url': 'http://pbs.twimg.com/profile_images/378800000451520338/068f7c4311d00c869319d4f78aeca15e_normal.png',
                               'profile_image_url_https': 'https://pbs.twimg.com/profile_images/378800000451520338/068f7c4311d00c869319d4f78aeca15e_normal.png',
                               'profile_link_color': '0084B4',
                               'profile_sidebar_border_color': 'C0DEED',
                               'profile_sidebar_fill_color': 'DDEEF6',
                               'profile_text_color': '333333',
                               'profile_use_

          'profile_sidebar_fill_color': 'DDEEF6',
          'profile_text_color': '333333',
          'profile_use_background_image': True,
          'protected': False,
          'screen_name': 'TungstenBigData',
          'statuses_count': 24402,
          'time_zone': 'New Delhi',
          'url': 'http://www.tungstenbigdata.com',
          'utc_offset': 19800,
          'verified': False}}

tweet  161
{'contributors': None,
 'coordinates': None,
 'created_at': 'Sun Mar 15 21:54:48 +0000 2015',
 'entities': {'hashtags': [{'indices': [15, 27], 'text': 'DataScience'},
                           {'indices': [37, 48], 'text': 'statistics'},
                           {'indices': [90, 98], 'text': 'bigdata'}],
              'symbols': [],
              'trends': [],
              'urls': [{'display_url': 'bit.ly/1x6oLOY',
                        'expanded_url': 'http://bit.ly/1x6oLOY',
                        'indices': [143, 144],
                        'url': 'http://t.co/cVnlTPxQfr'}

In [60]:
cnt=0
for tweet in tweets_j:
    print (tweet['id'])    
    print (tweet['text'])    
    print (tweet['created_at'])

577223085054558208
RT @MarcWilczek: In 2014, the digital universe equaled 1.7 megabytes a minute for every person on Earth: http://t.co/gLf50KtZKx #Bigdata #I…
Sun Mar 15 21:41:22 +0000 2015
577223118327926785
Tanginang database to
Sun Mar 15 21:41:30 +0000 2015
577223124862697472
RT @chrisco: The future of farming: robots + big data: http://t.co/MiEIZmKzEb via @KurzweilAINews #AgTech #AgBots #Robotics #Automation #Io…
Sun Mar 15 21:41:31 +0000 2015
577223160195502080
RT @Brian_Singer_: The EMC Federation Joins the OpenStack Foundation http://t.co/i37gbFthQw #cloud #bigdata #aws #ec2 http://t.co/uYQIXOSMHs
Sun Mar 15 21:41:40 +0000 2015
577223187110330368
RT @execedia: Learn @execedia: The Business Benefits of Big Data http://t.co/uQSGCKEiOn
 #BigData
Sun Mar 15 21:41:46 +0000 2015
577223217942691840
RT @RiverCityGal: The goal is not to keep #bigdata scary but to turn it into something beautiful @kalpenn #natgeosxsw @NatGeoChannel
Sun Mar 15 21:41:54 +0000 2015
577223226444509185
RT @T

Sun Mar 15 21:54:36 +0000 2015
577226426086256640
RT @Startuppodden: Ute nu: #StartuppoddengoesSUP46   @JohanAttby vd Fishbrain, om hur man bygger community med #bigdata #sthlmtech http://t…
Sun Mar 15 21:54:38 +0000 2015
577226453487616000
Opening up data unlocks citizenship http://t.co/cahubKsL2U #BigData
Sun Mar 15 21:54:45 +0000 2015
577226454901137410
German industry is poised to exploit rural broadband http://t.co/IPMr0CEziy #BigData
Sun Mar 15 21:54:45 +0000 2015
577226466422710272
RT @bobehayes: #DataScience team has #statistics, business &amp; comp engine skills 》 How #bigdata can make tracks in the rail industry http://…
Sun Mar 15 21:54:48 +0000 2015
577226470428377088
RT @robustus: Level 1: Bitcoin is scarce -&gt; valuable.
L. 2: Wait, BTC is just a "friggin database technology"
L. 3: Aha, *all* money is jus…
Sun Mar 15 21:54:49 +0000 2015
577226472819150848
RT @DataScientistFr: Leading Marketing Excellence With Analytics http://t.co/92XV9CUUpO #BigData #Analytics
Sun Mar 1

In [61]:
def tweet_json(tid,text,created):
    j={
    "tweet_id" : tid,
    "text" : text,
    "created_at" : created}
    return j

In [62]:
ts=datetime.now()
# "Current date and time using strftime:"
ts.strftime("%Y-%m-%d %H:%M")

'2018-04-01 22:54'

In [63]:
ts = datetime.strptime('Jun 1 2005  1:33PM', '%b %d %Y %I:%M%p')
ts.strftime("%Y-%m-%d %H:%M")

'2005-06-01 13:33'

In [64]:
# Twitter dates are of the form Sun Mar 15 21:41:54 +0000 2015
datestrings=['Sun Mar 15 21:41:54 +0000 2015','Tue Mar 29 08:11:25 +0000 2011']
from datetime import timedelta
from email.utils import parsedate_tz

def to_datetime(datestring):
    time_tuple = parsedate_tz(datestring.strip())
    dt = datetime(*time_tuple[:6])
    return dt - timedelta(seconds=time_tuple[-1])

ts=to_datetime(datestrings[0])
print (ts.strftime("%Y-%m-%d %H:%M"))
ts=to_datetime(datestrings[1])
print (ts.strftime("%Y-%m-%d %H:%M"))

2015-03-15 21:41
2011-03-29 08:11


In [65]:
cnt=0
for tweet in tweets_j:
    ts=datetime.now()
    try:
        ts=to_datetime(tweet['created_at'])
    except:
        pass     
    try:
        j=tweet_json(tweet['id'],tweet['text'],ts)
        result = db.tweets.insert_one(j)
        cnt+=1
    except:
        pass       
print ("%d tweets inserted."%cnt)

167 tweets inserted.


In [66]:
for tweet in db.tweets.find():
    pprint.pprint(tweet)

{'_id': ObjectId('5ac19b77a313fc02d6006fe3'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 41, 22),
 'text': 'RT @MarcWilczek: In 2014, the digital universe equaled 1.7 megabytes '
         'a minute for every person on Earth: http://t.co/gLf50KtZKx #Bigdata '
         '#I…',
 'tweet_id': 577223085054558208}
{'_id': ObjectId('5ac19b77a313fc02d6006fe4'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 41, 30),
 'text': 'Tanginang database to',
 'tweet_id': 577223118327926785}
{'_id': ObjectId('5ac19b77a313fc02d6006fe5'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 41, 31),
 'text': 'RT @chrisco: The future of farming: robots + big data: '
         'http://t.co/MiEIZmKzEb via @KurzweilAINews #AgTech #AgBots #Robotics '
         '#Automation #Io…',
 'tweet_id': 577223124862697472}
{'_id': ObjectId('5ac19b77a313fc02d6006fe6'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 41, 40),
 'text': 'RT @Brian_Singer_: The EMC Federation Joins the OpenStack Foundation '
         'ht

 'text': '@gaab75 @trufae (2/2) i think he is asking for a tradtnl database, '
         'with drivers for userfs to access (sometimes) the db as a '
         'filesystem.',
 'tweet_id': 577224297787338752}
{'_id': ObjectId('5ac19b77a313fc02d600701e'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 46, 19),
 'text': 'RT @WSCP1: IRS illegally sent 1.1 million page database of legally '
         'protected taxpayer information to FBI http://t.co/uBXDEMJPQN '
         '#TeaParty #…',
 'tweet_id': 577224330158981120}
{'_id': ObjectId('5ac19b77a313fc02d600701f'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 46, 21),
 'text': 'RT @robustus: Level 1: Bitcoin is scarce -&gt; valuable.\n'
         'L. 2: Wait, BTC is just a "friggin database technology"\n'
         'L. 3: Aha, *all* money is jus…',
 'tweet_id': 577224340539863041}
{'_id': ObjectId('5ac19b77a313fc02d6007020'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 46, 28),
 'text': 'RT @bbvaopentalent: Financial Times about @b

 'text': 'Tus #tickets d compra en tu #smartphone http://t.co/Ush5fCgQ3n …  '
         '#bigdata #gifts #shopping #WanasConceptStore\n'
         '#Zamalek #Cairo',
 'tweet_id': 577225792364670976}
{'_id': ObjectId('5ac19b77a313fc02d6007059'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 52, 15),
 'text': 'No she didn\'t just write "highjean..." Adding it to the database. '
         '#MinusWell',
 'tweet_id': 577225822399942656}
{'_id': ObjectId('5ac19b77a313fc02d600705a'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 52, 19),
 'text': "What's in Your Movie Database? http://t.co/7l1SH1zJcP "
         '#BestMovieStories',
 'tweet_id': 577225838950637568}
{'_id': ObjectId('5ac19b77a313fc02d600705b'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 52, 30),
 'text': 'RT @jalopezg70: 23andMe to Use Genetic Database for Drug Discovery '
         'http://t.co/8s6OpKTEOx via @rightrelevance thanks @wsjhealth',
 'tweet_id': 577225887713771521}
{'_id': ObjectId('5ac19b77a313fc02d6007

 'created_at': datetime.datetime(2015, 3, 15, 21, 54, 11),
 'text': 'RT @takuma10feet: '
         'モブスタイルス15周年広島来てくれたみんなありがとう！写真は今日のマンウィズとのオープニング一曲目"database '
         'feat.takuma"の1分前。楽しかったなぁ。そして今日のthe trust… https://t.co/BTDfzG2…',
 'tweet_id': 577226312667914240}
{'_id': ObjectId('5ac19b77a313fc02d6007079'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 54, 15),
 'text': 'Important shift in thinking: The #BBVA #bank vaults that hoard '
         '#bigdata instead of bullion http://t.co/YbKPTZQuyb @fintechna '
         '@CraigMilroy',
 'tweet_id': 577226327297798145}
{'_id': ObjectId('5ac19b77a313fc02d600707a'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 54, 15),
 'text': 'RT @cobusinessSol: The third #technological era is coming #IoT '
         'http://t.co/KbRBtWeCyJ #wearables #SmartCities #cloud #BigData '
         'http://t.co/I…',
 'tweet_id': 577226327343771648}
{'_id': ObjectId('5ac19b77a313fc02d600707b'),
 'created_at': datetime.datetime(2015, 3, 15, 21, 54, 17),

Updated April 1, 2018