# Indexing
## Basics
### Exercise 1
#### Preliminaries

In [None]:
from pymongo import MongoClient
from pprint import pprint

client = MongoClient()
blog = client.lab_index.blog

In [None]:
!mongoimport -d lab_index -c blog ./mongo/dataset-master/blog.json

#### 1-1

In [None]:
cursor = blog.find({'writer': 'Kim'})
explains = cursor.explain()
pprint(explains['executionStats'])

#### 1-2

In [None]:
res1 = blog.create_index([('writer', 1)])
res2 = blog.index_information()

print(res1, res2)

#### 1-3

In [None]:
cursor = blog.find({'writer': 'Kim'})
explains = cursor.explain()
pprint(explains['executionStats'])


#### 1-4

In [None]:
res3 = blog.create_index([('id', -1), ('writer', 1)])
print(res3)

#### 1-5

In [None]:
pprint(blog.index_information())
blog.drop_indexes()
pprint(blog.index_information())

### Exercise 2

In [None]:
! mongoimport -d lab -c metro ../dataset-master/metro_new.json

metro = client.lab.metro

In [None]:
## Exercise 2-1

# non-errors
metro.drop_indexes()
metro.create_index([('doc_id', 1)], unique=True)
metro.create_index([('intersect', 1)], sparse=True)
metro.create_index([('ride_pasgr_num', 1)],
                   partialFilterExpression={'ride_pasgr_num' : {'$gt': 10000}})

# error
metro.create_index([('line_num', 1)], unique=True)
metro.create_index([('intersect_id', 1)], unique=True, sparse=True)

# get index information
metro.index_information()

In [None]:
## Exercise 2-2

res1 = metro.find({}).sort([('doc_id',1)]).limit(2)
pprint(res1.explain()['executionStats'])
pprint(list(res1))

res2 = metro.find({}).sort([('doc_id',1)]).limit(2).hint([('intersect', 1)])
pprint(res2.explain()['executionStats'])
pprint(list(res2))


In [None]:
# Exercise 2-3
# Note that queries can be more improved by partialFilterExpression

# 1 
metro.drop_indexes()

before = metro.find({'sub_sta_nm': '서울대입구(관악구청)'}).explain()
metro.create_index([('sub_sta_nm', 1)])
after = metro.find({'sub_sta_nm': '서울대입구(관악구청)'}).explain()

print(before['executionStats']['executionStages']['stage'],
      before['executionStats']['executionTimeMillis'])

print(after['executionStats']['executionStages']['stage'], 
      after['executionStats']['executionTimeMillis'],
      after['queryPlanner']['winningPlan']['inputStage']['indexName']) # before, after 차이를 비교해볼 것

# 2 
metro.drop_indexes()

before = metro.find({'ride_pasgr_num': {'$gt': 100000}, 'alight_pasgr_num': {'$gt': 100000}}).explain()
metro.create_index([('ride_pasgr_num', 1), ('alight_pasgr_num', 1)])
after = metro.find({'ride_pasgr_num': {'$gt': 100000}, 'alight_pasgr_num': {'$gt': 100000}}).explain() 

print(before['executionStats']['executionStages']['stage'],
      before['executionStats']['executionTimeMillis'])

print(after['executionStats']['executionStages']['stage'],
      after['executionStats']['executionTimeMillis'],
      after['queryPlanner']['winningPlan']['inputStage']['indexName'])


# 3
metro.drop_indexes()

before = metro.find({'ride_pasgr_num': {'$gt': 100000}, 'alight_pasgr_num': {'$gt': 100000}}).sort([('ride_pasgr_num', 1)]).explain()
# metro.create_index([('alight_pasgr_num', 1), ('ride_pasgr_num', 1)])
metro.create_index([('ride_pasgr_num', 1), ('alight_pasgr_num', 1)])
after = metro.find({'ride_pasgr_num': {'$gt': 100000}, 'alight_pasgr_num': {'$gt': 100000}}).sort([('ride_pasgr_num', 1)]).explain() 

print(before['executionStats']['executionStages']['stage'],
      before['executionStats']['executionTimeMillis'])

# print(after['executionStats']['executionStages']['stage'],
#       after['executionStats']['executionTimeMillis'], 
#       after['queryPlanner']['winningPlan']['inputStage']['inputStage']['indexName'])

print(after['executionStats']['executionStages']['stage'],
      after['executionStats']['executionTimeMillis'],
      after['queryPlanner']['winningPlan']['inputStage']['indexName'])


# 4
metro.drop_indexes()

before = metro.find({'ride_pasgr_num': {'$gt': 100000}, 'alight_pasgr_num': {'$gt': 100000}}).sort([('ride_pasgr_num', 1), ('alight_pasgr_num', 1)]).explain()
metro.create_index([('ride_pasgr_num', 1), ('alight_pasgr_num', 1)])
after = metro.find({'ride_pasgr_num': {'$gt': 100000}, 'alight_pasgr_num': {'$gt': 100000}}).sort([('ride_pasgr_num', 1), ('alight_pasgr_num', 1)]).explain()

print(before['executionStats']['executionStages']['stage'],
      before['executionStats']['executionTimeMillis'])

print(after['executionStats']['executionStages']['stage'],
      after['executionStats']['executionTimeMillis'],
      after['queryPlanner']['winningPlan']['inputStage']['indexName']) 


# 5: Guess!