In [2]:
from pymongo import MongoClient

## 连接数据库

In [3]:
# 连接数据库，为空的话默认ip为localhost，端口为27017
#以下三种结果都一样
client = MongoClient()
client = MongoClient('localhost',27017)
#使用url 格式连接
client = MongoClient('mongodb://localhost:27017/')

In [4]:
# 连接指定的数据库
db = client.test_database
db = client['test-database']

In [5]:
#连接指定的表
collection = db.test_collection
collection = db['test-collection']

## 插入文档

In [6]:
import datetime

In [7]:
post = {"author": "Mike",
         "text": "My first blog post!",
         "tags": ["mongodb", "python", "pymongo"],
         "date": datetime.datetime.utcnow()}

In [8]:
# t连接test1 表
test1 = db.test1

In [9]:
# 插入一条数据，
test1.insert_one(post)

<pymongo.results.InsertOneResult at 0x5524990>

In [10]:
#查找一条
test1.find_one()

{u'_id': ObjectId('58a5841ee38a921b6c0490dd'),
 u'author': u'Mike',
 u'date': datetime.datetime(2017, 2, 16, 10, 51, 8, 638000),
 u'tags': [u'mongodb', u'python', u'pymongo'],
 u'text': u'My first blog post!'}

In [11]:
#指定条件查询
test1.find_one({'author':'Mike'})

{u'_id': ObjectId('58a5841ee38a921b6c0490dd'),
 u'author': u'Mike',
 u'date': datetime.datetime(2017, 2, 16, 10, 51, 8, 638000),
 u'tags': [u'mongodb', u'python', u'pymongo'],
 u'text': u'My first blog post!'}

In [12]:
#指定条件查询,如果没有查询到，则返回空
test1.find_one({'author':'mike'})

In [15]:
#指定id 查询,
# 这边copy的是上面查询到的id
#但是发现结果为空
test1.find_one({'_id':'58a5841ee38a921b6c0490dd'})

In [16]:
#需要这样做
from bson.objectid import ObjectId
test1.find_one({'_id':ObjectId('58a5841ee38a921b6c0490dd')})

{u'_id': ObjectId('58a5841ee38a921b6c0490dd'),
 u'author': u'Mike',
 u'date': datetime.datetime(2017, 2, 16, 10, 51, 8, 638000),
 u'tags': [u'mongodb', u'python', u'pymongo'],
 u'text': u'My first blog post!'}

## 插入多条数据

In [17]:
new_posts = [{"author": "Mike",
               "text": "Another post!",
               "tags": ["bulk", "insert"],
               "date": datetime.datetime(2009, 11, 12, 11, 14)},
              {"author": "Eliot",
               "title": "MongoDB is fun",
               "text": "and pretty easy too!",
               "date": datetime.datetime(2009, 11, 10, 10, 45)}]

In [18]:
len(new_posts)

2

In [19]:
# 插入多条
result = test1.insert_many(new_posts)

In [20]:
# 返回插入的id
result.inserted_ids

[ObjectId('58a58440e38a921b6c0490de'), ObjectId('58a58440e38a921b6c0490df')]

In [21]:
# 查询所有结果
for i in test1.find():
    print i

{u'date': datetime.datetime(2017, 2, 16, 10, 51, 8, 638000), u'text': u'My first blog post!', u'_id': ObjectId('58a5841ee38a921b6c0490dd'), u'author': u'Mike', u'tags': [u'mongodb', u'python', u'pymongo']}
{u'date': datetime.datetime(2009, 11, 12, 11, 14), u'text': u'Another post!', u'_id': ObjectId('58a58440e38a921b6c0490de'), u'author': u'Mike', u'tags': [u'bulk', u'insert']}
{u'date': datetime.datetime(2009, 11, 10, 10, 45), u'text': u'and pretty easy too!', u'_id': ObjectId('58a58440e38a921b6c0490df'), u'author': u'Eliot', u'title': u'MongoDB is fun'}


In [22]:
# 查询指定结果
for i in test1.find({'author':'Mike'}):
    print i

{u'date': datetime.datetime(2017, 2, 16, 10, 51, 8, 638000), u'text': u'My first blog post!', u'_id': ObjectId('58a5841ee38a921b6c0490dd'), u'author': u'Mike', u'tags': [u'mongodb', u'python', u'pymongo']}
{u'date': datetime.datetime(2009, 11, 12, 11, 14), u'text': u'Another post!', u'_id': ObjectId('58a58440e38a921b6c0490de'), u'author': u'Mike', u'tags': [u'bulk', u'insert']}


In [23]:
# 总数
test1.count()

3

In [24]:
# 指定查询的条数
test1.find({'author':'Mike'}).count()

2

In [25]:
# 连接一个新的表
test2 = db.test2
insert = [{'x':1},{'x':1},{'x':1}]
# 插入三条数据
test2.insert_many(insert)

<pymongo.results.InsertManyResult at 0x5524f78>

In [26]:
# 打印下插入的三条数据
for i in test2.find():
    print i

{u'x': 1, u'_id': ObjectId('58a58454e38a921b6c0490e0')}
{u'x': 1, u'_id': ObjectId('58a58454e38a921b6c0490e1')}
{u'x': 1, u'_id': ObjectId('58a58454e38a921b6c0490e2')}


## 插入或者更新

In [27]:
#如果有找到x =1，则改成x=3。否则插入{'x':3},单条
result = test2.update_one({'x':1},{'$set':{'x':3}},upsert=True)

In [28]:
# 打印下插入的三条数据
# 发现已经把其中的一条改为了x=3
for i in test2.find():
    print i

{u'x': 3, u'_id': ObjectId('58a58454e38a921b6c0490e0')}
{u'x': 1, u'_id': ObjectId('58a58454e38a921b6c0490e1')}
{u'x': 1, u'_id': ObjectId('58a58454e38a921b6c0490e2')}


In [29]:
#如果有找到x =4，则改成x=333。否则插入{'x':333},单条
# 这边我们知道数据库内是没有x=4的
result = test2.update_one({'x':4},{'$set':{'x':333}},upsert=True)

In [30]:
# 打印下插入的三条数据
# 发现新增了一条 {'x':333}
for i in test2.find():
    print i

{u'x': 3, u'_id': ObjectId('58a58454e38a921b6c0490e0')}
{u'x': 1, u'_id': ObjectId('58a58454e38a921b6c0490e1')}
{u'x': 1, u'_id': ObjectId('58a58454e38a921b6c0490e2')}
{u'x': 333, u'_id': ObjectId('58a58477dea1b7eb2f881b9b')}


### 多条 update_many

In [31]:
#如果有找到x =1，则改成x='mangmang'。否则插入{'x':mangmang},多条
result = test2.update_many({'x':1},{'$set':{'x':'mangmang'}},upsert=True)

In [32]:
# 打印下插入的三条数据
# 发现已经把所有的x=1 改成了 x=mangmang
for i in test2.find():
    print i

{u'x': 3, u'_id': ObjectId('58a58454e38a921b6c0490e0')}
{u'x': u'mangmang', u'_id': ObjectId('58a58454e38a921b6c0490e1')}
{u'x': u'mangmang', u'_id': ObjectId('58a58454e38a921b6c0490e2')}
{u'x': 333, u'_id': ObjectId('58a58477dea1b7eb2f881b9b')}


In [33]:
#如果有找到x =4，则改成{'x':1,'y':2}。否则插入{'x':1,'y':2},多条
# 这边我们知道数据库内是没有x=4的
result = test2.update_one({'x':4},{'$set':{'x':1,'y':2}},upsert=True)

In [34]:
# 打印下插入的三条数据
# 发现已经新增了一条{u'y': 2, u'x': 1}
for i in test2.find():
    print i

{u'x': 3, u'_id': ObjectId('58a58454e38a921b6c0490e0')}
{u'x': u'mangmang', u'_id': ObjectId('58a58454e38a921b6c0490e1')}
{u'x': u'mangmang', u'_id': ObjectId('58a58454e38a921b6c0490e2')}
{u'x': 333, u'_id': ObjectId('58a58477dea1b7eb2f881b9b')}
{u'y': 2, u'x': 1, u'_id': ObjectId('58a58499dea1b7eb2f881b9c')}


## 总结
### 爬虫经常用到的三条插入语句
下面的**test2为表名**,  
1. ```test2.insert_one(xx)``` 插入一条数据
2. ```test2.insert_many(xx)``` 插入list
3. **最常用>>>**```test2.update_one({'x':1},{'$set':{'x':3}},upsert=True)``` 
 - 第三条一般会在防止重复的数据被存到数据库内 要用到

## 参考
- [mongodb中文文档](http://www.runoob.com/mongodb/mongodb-tutorial.html)
- [mongodb官方文档](https://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.find_one_and_update)