# 连接MongoDB

In [1]:
import pymongo

client = pymongo.MongoClient(host="localhost", port=27017)

# 指定数据库

In [9]:
db = client.test

# 指定集合

In [10]:
collection = db.students

# 插入数据

## 插入单条数据

In [12]:
student = {
    'id': '20170101',
    'name': 'Jordan',
    'age': 20,
    'gender': 'male'
}
result = collection.insert_one(student)
result.inserted_id

ObjectId('5bd542b48cfead2940af4e8f')

## 插入多条数据

In [13]:
student1 = {
    'id': '20170101',
    'name': 'Jordan',
    'age': 20,
    'gender': 'male'
}
 
student2 = {
    'id': '20170202',
    'name': 'Mike',
    'age': 21,
    'gender': 'male'
}

result = collection.insert_many([student1, student2])
result

<pymongo.results.InsertManyResult at 0x48fde88>

In [14]:
result.inserted_ids

[ObjectId('5bd5433d8cfead2940af4e90'), ObjectId('5bd5433d8cfead2940af4e91')]

# 查询

## 查询单条

In [15]:
result = collection.find_one({"name": "Mike"})
type(result)

dict

In [16]:
result

{'_id': ObjectId('5bd5433d8cfead2940af4e91'),
 'age': 21,
 'gender': 'male',
 'id': '20170202',
 'name': 'Mike'}

In [19]:
from bson.objectid import ObjectId


result = collection.find_one({"_id": ObjectId('5bd5433d8cfead2940af4e91')})
result

{'_id': ObjectId('5bd5433d8cfead2940af4e91'),
 'age': 21,
 'gender': 'male',
 'id': '20170202',
 'name': 'Mike'}

## 查询多条数据

### 精确匹配

In [20]:
results = collection.find({"age": 20})
for result in results:
    print(result)

{'name': 'Jordan', 'age': 20, '_id': ObjectId('5bd542678cfead2940af4e8e'), 'id': '20170101', 'gender': 'male'}
{'name': 'Jordan', 'age': 20, '_id': ObjectId('5bd542b48cfead2940af4e8f'), 'id': '20170101', 'gender': 'male'}
{'name': 'Jordan', 'age': 20, '_id': ObjectId('5bd5433d8cfead2940af4e90'), 'id': '20170101', 'gender': 'male'}


### 范围匹配

In [24]:
results = collection.find({"age": {"$gt": 20}})
for result in results:
    print(result)

{'name': 'Mike', 'age': 21, '_id': ObjectId('5bd5433d8cfead2940af4e91'), 'id': '20170202', 'gender': 'male'}


### 正则匹配

In [26]:
results = collection.find({"name": {"$regex": "^M.*"}})
for result in results:
    print(result)

{'name': 'Mike', 'age': 21, '_id': ObjectId('5bd5433d8cfead2940af4e91'), 'id': '20170202', 'gender': 'male'}


# 计数

In [28]:
count = collection.find().count()
count

4

# 排序

In [30]:
results = collection.find().sort("name", pymongo.DESCENDING)
print([result["name"] for result in results])

['Mike', 'Jordan', 'Jordan', 'Jordan']


# 偏移

In [31]:
results = collection.find().sort("name", pymongo.ASCENDING).skip(2)
print([result["name"] for result in results])

['Jordan', 'Mike']


## 取结果个数

In [32]:
results = collection.find().sort("name", pymongo.DESCENDING).skip(2).limit(1)
print([result["name"] for result in results])

['Jordan']


得注意的是，在数据库数量非常庞大的时候，如千万、亿级别，最好不要使用大的偏移量来查询数据，因为这样很可能导致内存溢出。此时可以使用类似如下操作来查询:

Ps.这时需要记录好上次查询的_id

In [34]:
from bson.objectid import ObjectId
collection.find({"_id": {"$gt": ObjectId("5bd5433d8cfead2940af4e91")}})

<pymongo.cursor.Cursor at 0x4967a90>

# 更新

## 一般操作

In [43]:
condition = {"name": "Mike"}
student = collection.find_one(condition)
student["age"] = 28
result = collection.update_one(condition, {"$set": student})
result

<pymongo.results.UpdateResult at 0x49e4b08>

In [44]:
result.matched_count

1

In [45]:
result.modified_count

1

## 更新一条

In [None]:
condition = {"age": {"$gt": 20}}
result = collection.update_one(condition, {"$inc": {"age": 1}})
print(result.modified_count, result.matched_count)

## 更新多条

In [47]:
condition = {"age": {"$lt": 21}}
result = collection.update_many(condition, {"$inc": {"age": 1}})
print(result.matched_count, result.modified_count)

3 3


# 删除

## 删一条

In [None]:
result = collection.delete_one({"name": "Mike"})
result.deleted_count

## 删多条

In [51]:
result = collection.delete_many({"age": {"$lt": 25}})
result.deleted_count

3

# 聚合

## 管道

In [26]:
db = client.article

- project：修改输入文档的结构。可以用来重命名、增加或删除域，也可以用于创建计算结果以及嵌套文档。
- match：用于过滤数据，只输出符合条件的文档。$match使用MongoDB的标准查询操作。
- limit：用来限制MongoDB聚合管道返回的文档数。
- skip：在聚合管道中跳过指定数量的文档，并返回余下的文档。
- unwind：将文档中的某一个数组类型字段拆分成多条，每条包含数组中的一个值。
- group：将集合中的文档分组，可用于统计结果。
- sort：将输入文档排序后输出。
- geoNear：输出接近某一地理位置的有序文档。

In [33]:
result = db.things.insert_many([{"x": 1, "tags": ["dog", "cat"]},
                              {"x": 2, "tags": ["cat"]},
                              {"x": 2, "tags": ["mouse", "cat", "dog"]},
                              {"x": 3, "tags": []}])

In [35]:
from bson import SON

pipeline = [
    {"$unwind": "$tags"},
    {"$group": {"_id": "$tags", "count": {"$sum": 1}}},
    {"$sort": SON([("count", pymongo.DESCENDING), ("_id", pymongo.DESCENDING)])}
]
print(list(db.things.aggregate(pipeline)))

[{'_id': 'cat', 'count': 3}, {'_id': 'dog', 'count': 2}, {'_id': 'mouse', 'count': 1}]
