# Role API

## Fast API

> **Doporučené video**
>
> [FastAPI Introduction - Build Your First Web App - Python Tutorial 12 minut](https://www.youtube.com/watch?v=0RS9W8MtZe4)
>
> [Let's Build a Fast, Modern Python API with FastAPI 1,5 h](https://www.youtube.com/watch?v=sBVb4IB3O_U)

Fast API má jednu obrovskou výhodu oproti obdobným systémům / frameworkům. Touto výhodou je automatická publikace popisu API ve formě **[Swagger](https://swagger.io/)** dokumentu.
Díky Swagger (nebo OpenAPI) je možné využít [celou řadu nástrojů](https://swagger.io/tools/swagger-codegen/) pro generování klientů tvořeného API.

https://fastapi.tiangolo.com/tutorial/sql-databases/

In [126]:
!pip install sqlalchemy



### SQL Alchemy

https://github.com/LeeBergstrand/Jupyter-SQLAlchemy-Tutorial/blob/master/Jupyter-SQLAlchemy.ipynb

In [41]:
#https://docs.sqlalchemy.org/en/13/orm/tutorial.html
#https://docs.sqlalchemy.org/en/14/orm/basic_relationships.html
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, BigInteger, Sequence, Table, ForeignKey, DateTime
from sqlalchemy.orm import relationship

BaseModel = declarative_base()

### Models

In [42]:
unitedSequence = Sequence('all_id_seq')

class UserModel(BaseModel):
    __tablename__ = 'users'

    #id = Column(BigInteger, Sequence('users_id_seq'), primary_key=True)
    id = Column(BigInteger, unitedSequence, primary_key=True)
    name = Column(String)

    def __init__(self, name):
        self.name = name
        
class UserGroupModel(BaseModel):
    __tablename__ = 'usergroups'

    id = Column(BigInteger, unitedSequence, primary_key=True)
    user_id = Column(BigInteger, ForeignKey('users.id'), index=True)
    group_id = Column(BigInteger, ForeignKey('groups.id'), index=True)
    
    #user = relationship('UserModel', uselist=False, back_populates='groups', primaryjoin=user_id==UserModel.id)
    group = relationship('GroupModel', uselist=False, back_populates='users')#, primaryjoin=authorization_id==AuthorizationModel.id)
    

class GroupModel(BaseModel):
    __tablename__ = 'groups'
    
    id = Column(BigInteger, unitedSequence, primary_key=True)
    name = Column(String)
    
    users = relationship('UserGroupModel', back_populates='group', lazy='dynamic', primaryjoin=id==UserGroupModel.group_id)
        

In [127]:
!pip install pydantic



### Schemas

In [39]:
from typing import List, Optional

from pydantic import BaseModel as BaseSchema

class UserCreateSchema(BaseSchema):
    name: str
        
class UserIdSchema(UserCreateSchema):
    id: int

class UserGetSchema(BaseSchema):
    id: int
    name: str
    class Config:
        orm_mode = True #ensures appropriate translation from SQLAlchemy 
    pass

class UserPutSchema(BaseSchema):
    id: int
    name: str


#### Engine Init

In [None]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
#engine = create_engine('sqlite:///:memory:', echo=True)
#engine = create_engine('postgresql+psycopg2://user:password@hostname/database_name')
engine = create_engine('postgresql+psycopg2://postgres:example@postgres/jupyterII') 
Session = sessionmaker(bind=engine)
session = Session()
BaseModel.metadata.drop_all(engine)
BaseModel.metadata.create_all(engine)

#### CRUD Ops

In [44]:
def crudUserGet(db: Session, id: int):
    return db.query(UserModel).filter(UserModel.id==id).first()

def crudUserGetAll(db: Session, skip: int = 0, limit: int = 100):
    return db.query(UserModel).offset(skip).limit(limit).all()

def crudUserCreate(db: Session, user: UserCreateSchema):
    userRow = UserModel(name=user.name)
    db.add(userRow)
    db.commit()
    db.refresh(userRow)
    return userRow

def crudUserUpdate(db: Session, user):
    userToUpdate = db.query(UserModel).filter(UserModel.id==user.id).first()
    userToUpdate.name = user.name if user.name else userToUpdate.name
    db.commit()
    db.refresh(userToUpdate)
    return userToUpdate

NameError: name 'Session' is not defined

#### Test

In [None]:
import random
import string

def get_random_string(length):
    letters = string.ascii_lowercase
    result = ''.join(random.choice(letters) for i in range(length))
    return result 

def PopulateUsers(count=10):
    for i in range(count):
        crudUserCreate(db=session, user=UserModel(name='user_' + get_random_string(20)))
        
PopulateUsers(10)

In [None]:
usersData = list(crudUserGetAll(db=session))
for index, userRow in enumerate(usersData):
    row = crudUserGet(db=session, id=userRow.id)
    print(index, '\t', row.id, row.name)

#### Server

In [28]:
!pip install uvicorn
!pip install fastapi
!pip install wait4it



#### Minimal Code

In [34]:
import uvicorn
from fastapi import FastAPI

app = FastAPI()#root_path='/api')

def run():
    uvicorn.run(app, port=9993, host='0.0.0.0', root_path='')

#### Helper Func for Notebook

In [35]:
# Code in this cell is just for (re)starting the API on a Process, and other compatibility stuff with Jupyter cells.
# Just ignore it!

from multiprocessing import Process
from wait4it import wait_for

_api_process = None

def start_api(runNew=True):
    """Stop the API if running; Start the API; Wait until API (port) is available (reachable)"""
    global _api_process
    if _api_process:
        _api_process.terminate()
        _api_process.join()
    
    if runNew:
        _api_process = Process(target=run, daemon=True)
        _api_process.start()
        wait_for(port=9993)

def delete_route(method: str, path: str):
    """Delete the given route from the API. This must be called on cells that re-define a route"""
    [app.routes.remove(route) for route in app.routes if method in route.methods and route.path == path]
    

In [31]:
def delete_all_routes():
    rr = [*app.routes]
    for item in rr:
        app.routes.remove(item)

#### First API Endpoint

In [36]:
@app.get("/api")
def get_root():
    return {"Hello": "World"}

start_api()

INFO:     Started server process [669]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:9993 (Press CTRL+C to quit)


INFO:     172.17.0.1:47700 - "GET / HTTP/1.1" 404 Not Found
INFO:     172.17.0.1:47898 - "GET /api HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [669]


In [38]:
# Get with "surname" param only
import requests

r = requests.get("http://localhost:9992/api")
print("Status code:", r.status_code)
print("Response:", r.json())

Status code: 200
Response: {'Hello': 'World'}


In [37]:
start_api(False)

#### Database CRUD Endpoint

In [40]:
#delete_all_routes()

@app.get("/users/{id}", response_model=UserGetSchema)
#@app.get("/users/{id}")
def userGet(id: int):
    #result = crudUserGet(db=session, id=id)
    result = {'id': id, 'name': 'Hrbolek', 'password': 'extraultrahesozahesovane'}
    return result

@app.get("/users", response_model=List[UserGetSchema])
def userGetAll(skip: Optional[int]=0, limit: Optional[int]=100):
    #result = crudUserGetAll(db=session, skip=skip, limit=limit)
    #return result
    pass

@app.post("/users")#, response_model=UserIdSchema)
def userPost(user: UserCreateSchema):
    #print('userPut')
    #return crudUserCreate(db=session, user=user)
    pass

@app.put("/users", response_model=UserGetSchema)
def userPut(user: UserPutSchema):
    #result = crudUserUpdate(db=session, user=user)
    #return result
    pass

start_api()

INFO:     Started server process [691]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:9993 (Press CTRL+C to quit)


INFO:     172.17.0.1:49872 - "GET /users/45 HTTP/1.1" 200 OK
INFO:     172.17.0.1:50234 - "GET /users/56 HTTP/1.1" 200 OK
INFO:     172.17.0.1:51720 - "GET /docs HTTP/1.1" 200 OK
INFO:     172.17.0.1:51720 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     172.17.0.1:52160 - "GET /users/78 HTTP/1.1" 200 OK
INFO:     172.17.0.1:52346 - "GET /openapi.json HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [691]


In [41]:
start_api(False)

### GraphQL

Graph QL

https://www.apollographql.com/docs/apollo-server/

Python
- https://docs.graphene-python.org/en/latest/quickstart/
- https://fastapi.tiangolo.com/advanced/graphql/

Otázka sytémové integrace, 

In [1]:
!pip install "graphene>=2.0"

Collecting graphene>=2.0
  Downloading graphene-2.1.9-py2.py3-none-any.whl (107 kB)
[K     |████████████████████████████████| 107 kB 4.0 MB/s eta 0:00:01
[?25hCollecting graphql-core<3,>=2.1
  Downloading graphql_core-2.3.2-py2.py3-none-any.whl (252 kB)
[K     |████████████████████████████████| 252 kB 5.3 MB/s eta 0:00:01
[?25hCollecting aniso8601<=7,>=3
  Downloading aniso8601-7.0.0-py2.py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 921 kB/s eta 0:00:011
[?25hCollecting graphql-relay<3,>=2
  Downloading graphql_relay-2.0.1-py3-none-any.whl (20 kB)
Collecting rx<2,>=1.6
  Downloading Rx-1.6.1-py2.py3-none-any.whl (179 kB)
[K     |████████████████████████████████| 179 kB 4.8 MB/s eta 0:00:01
[?25hCollecting promise<3,>=2.3
  Downloading promise-2.3.tar.gz (19 kB)
Building wheels for collected packages: promise
  Building wheel for promise (setup.py) ... [?25ldone
[?25h  Created wheel for promise: filename=promise-2.3-py3-none-any.whl size=21494 sha256

#### Příklad Hello World

In [9]:
from graphene import ObjectType, String
from graphene import Schema as GSchema

class Query(ObjectType):
    # this defines a Field `hello` in our Schema with a single Argument `name`
    hello = String(name=String(default_value="stranger"))
    goodbye = String()

    # our Resolver method takes the GraphQL context (root, info) as well as
    # Argument (name) for the Field and returns data for the query Response
    def resolve_hello(root, info, name):
        return f'Hello {name}!'

    def resolve_goodbye(root, info):
        return 'See ya!'

schema = GSchema(query=Query)
result = schema.execute('''query {
    hello
}''')
print(result)
result = schema.execute('''query {
    goodbye
}''')
print(result)

None
<graphql.execution.base.ResolveInfo object at 0x7f83f1605440>
stranger
{'data': {'hello': 'Hello stranger!'}}
{'data': {'goodbye': 'See ya!'}}


#### Server Implementace

In [4]:
import uvicorn
from fastapi import FastAPI

app = FastAPI()#root_path='/api')

def run():
    uvicorn.run(app, port=9992, host='0.0.0.0', root_path='')
    
# Code in this cell is just for (re)starting the API on a Process, and other compatibility stuff with Jupyter cells.
# Just ignore it!

from multiprocessing import Process
from wait4it import wait_for

_api_process = None

def start_api_QL(runNew=True):
    """Stop the API if running; Start the API; Wait until API (port) is available (reachable)"""
    global _api_process
    if _api_process:
        _api_process.terminate()
        _api_process.join()
    
    if runNew:
        _api_process = Process(target=run, daemon=True)
        _api_process.start()
        wait_for(port=9992)

def delete_route(method: str, path: str):
    """Delete the given route from the API. This must be called on cells that re-define a route"""
    [app.routes.remove(route) for route in app.routes if method in route.methods and route.path == path]
    

In [5]:
import graphene
from fastapi import FastAPI
from starlette.graphql import GraphQLApp


class Query(graphene.ObjectType):
    hello = graphene.String(name=graphene.String(default_value="stranger"))

    def resolve_hello(self, info, name):
        return "Hello " + name


app = FastAPI()

app.add_route("/qlapi/", GraphQLApp(schema=graphene.Schema(query=Query)))
start_api_QL()

INFO:     Started server process [152]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:9992 (Press CTRL+C to quit)


INFO:     172.18.0.1:37128 - "GET /apiql/ HTTP/1.1" 404 Not Found
INFO:     172.18.0.1:37142 - "GET /qlapi/ HTTP/1.1" 200 OK
INFO:     172.18.0.1:37142 - "POST /qlapi/ HTTP/1.1" 200 OK
INFO:     172.18.0.1:37142 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     172.18.0.1:37156 - "POST /qlapi/ HTTP/1.1" 200 OK


Nyní je Graph QL server dostupný na http://localhost:31102/qlapi/

do web UI je možné zadat dotaz

```
query {
    hello
}
```

In [None]:
start_api_QL(False)

#### Složitější příklad

In [31]:
from graphene import ObjectType, String, Field, ID, List
from graphene import Schema as GSchema

class Book(ObjectType):
    title = String()
    id = ID()
    
class BookData():
    def __init__(self, id):
        self.id = id
        self.title = 'Babicka'

class Author(ObjectType):
    first_name = String()
    last_name = String()
    full_name = String()
    id = ID()
    books = List(Book)

    def resolve_full_name(parent, info):
        return f"{parent.first_name} {parent.last_name}"
    
    def resolve_books(parent, info):
        return [BookData(14), BookData(11)]

class AuthorData():
    def __init__(self, id):
        self.first_name = 'Bob';
        self.last_name = 'Newmann'
        self.id = id
            
class Query(ObjectType):

    author = Field(Author, id=ID(required=True))
    book = Field(Book, id=ID(required=True))
    
    def resolve_author(root, info, id):
        return AuthorData(id)
    
    def resolve_book(root, info, id):
        return BookData(id)

schema = GSchema(query=Query)
result = schema.execute('''query {
    author(id: 1245) {
        firstName
        lastName
        fullName
        id
        books {
            title
            id
        }
    }
}''')
print(result)


{'data': {'author': {'firstName': 'Bob', 'lastName': 'Newmann', 'fullName': 'Bob Newmann', 'id': '1245', 'books': [{'title': 'Babicka', 'id': '14'}, {'title': 'Babicka', 'id': '11'}]}}}


## Databázove klastry

Pro potřeby výkoných databází se slučují jednotlivé servery do klastrů (clusters). Databáze toto slučování podporují různým způsobem.

V souvislosti s klastry je možné setkat se s pojmy
- Replikace (replication)
- (Connection Pooling)
- Vyvažování záteže (Load Balancing)
- Dotaz nad více servery (Query Partitioning)

PostgreSQL
- https://www.postgresql.org/docs/9.5/creating-cluster.html
- https://wiki.postgresql.org/wiki/Replication,_Clustering,_and_Connection_Pooling

MySQL
- https://www.digitalocean.com/community/tutorials/how-to-create-a-multi-node-mysql-cluster-on-ubuntu-18-04
- https://dev.mysql.com/doc/refman/8.0/en/mysql-cluster.html

MSSQL
- https://docs.microsoft.com/en-us/sql/sql-server/failover-clusters/install/create-a-new-sql-server-failover-cluster-setup?view=sql-server-ver15


## LINQ

LINQ je jazyk integrovaný do .NET. Umožňuje použít Relační algebru nad zdroji v rámci programovacího jazyka.

C#:

https://docs.microsoft.com/cs-cz/dotnet/csharp/programming-guide/concepts/linq/

Visual Basic:

https://docs.microsoft.com/cs-cz/dotnet/visual-basic/programming-guide/language-features/linq/introduction-to-linq

Velmi často se používá v .net core pro přístup k databázi.

## Normální formy

> **Doporučené video**
>
> https://www.youtube.com/watch?v=7B9FnIIIsQc
> 
> https://www.youtube.com/watch?v=xoTyrdT9SZI

# NoSQL Databáze

MongoDb, CouchDb jsou databáze, které pracují s dokumenty / datovými strukturami (např. JSON).

https://www.freelancinggig.com/blog/2018/04/19/couchdb-vs-mongodb-understanding-difference/

Speciálními případy jsou databáze typu Neo4j nebo Redis.

> **Přečíst povinně**
>
> https://en.wikipedia.org/wiki/Graph_database

> **Přečíst volitelně**
>
> https://neo4j.com/download-center/?ref=web-product-database/#community
>
> https://redis.io/

## Vsuvka k asynchronnímu programování

> **Doporučené video**
>
> [Raymond Hettinger, Keynote on Concurrency, PyBay 2017 1h 14min](https://www.youtube.com/watch?v=9zinZmE3Ogk)

Inspirováno / převzato z https://pybay.com/site_media/slides/raymond2017-keynote/threading.html

In [42]:
counter = 0

def worker():
    global counter
    oldValue = counter
    counter = oldValue + 1
    
    
for i in range(10):
    worker()
    
print('final value is', counter)

final value is 10


In [43]:
import threading

counter = 0

def worker():
    global counter
    oldValue = counter
    counter = oldValue + 1
    
    
for i in range(10):
    threading.Thread(target=worker).start()
    
print('final value is', counter)

final value is 10


In [45]:
import threading
import time
import random

def fuzzIt():
    time.sleep(random.randint(1, 5))

counter = 0

def worker():
    global counter
    fuzzIt()
    oldValue = counter
    fuzzIt()
    counter = oldValue + 1
    fuzzIt()
    
    
for i in range(10):
    threading.Thread(target=worker).start()
    
print('final value is', counter)

In [47]:
print('final value is', counter)

final value is 3


In [52]:
import asyncio
import time

def mS(start=0):
    return time.time() - start

async def execute():
    await asyncio.sleep(1)
    return 2

result = execute()
print(result)

start = mS()
awaitedResult = await result
end = mS(start)
print(awaitedResult)
print('elapsed', end)

<coroutine object execute at 0x7fa7bba52f40>
2
elapsed 1.0011518001556396


In [56]:
def fuzzIt():
    time.sleep(random.randint(1, 5))


counter = 0
async def execute2():
    fuzzIt()
    await asyncio.sleep(1)
    fuzzIt()
    global counter
    fuzzIt()
    oldValue = counter
    fuzzIt()
    counter = oldValue + 1
    
    
tasks = []
for i in range(10):
    tasks.append(execute2())
    
start = mS()
results = await asyncio.gather(*tasks)
end = mS(start)
print('final value', counter)
print('elapsed', end)

final value 10
elapsed 136.11234211921692


## MongoDB

https://motor.readthedocs.io/en/stable/tutorial-asyncio.html

> **Dopo video**
>
> [MongoDB with Python Crash Course - Tutorial for Beginners 2h](https://www.youtube.com/watch?v=E-1xI85Zog8)

In [3]:
!pip install motor

In [3]:
!pip install pymongo

Collecting motor
  Downloading motor-2.3.1-py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 53 kB/s  eta 0:00:011
[?25hCollecting pymongo<4,>=3.11
  Downloading pymongo-3.11.3-cp38-cp38-manylinux2014_x86_64.whl (531 kB)
[K     |████████████████████████████████| 531 kB 7.2 MB/s eta 0:00:01
[?25hInstalling collected packages: pymongo, motor
Successfully installed motor-2.3.1 pymongo-3.11.3


In [71]:
import getpass
mongoPassword = getpass.getpass()

 ·········


In [72]:
import motor.motor_asyncio

In [73]:
import pandas as pd

def displayData(data):
    df = pd.DataFrame(data)
    display(df)

### Connection

In [74]:
from pymongo import MongoClient
client = MongoClient('mongodb://%s:%s@192.168.1.6:27017' % ('root', mongoPassword))
db = client.admin
serverStatusResult = db.command("serverStatus")
displayData(serverStatusResult)

Unnamed: 0,host,version,process,pid,uptime,uptimeMillis,uptimeEstimate,localTime,asserts,connections,...,storageEngine,tcmalloc,trafficRecording,transactions,transportSecurity,twoPhaseCommitCoordinator,wiredTiger,mem,metrics,ok
regular,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,0.0,,...,,,,,,,,,,1.0
warning,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,0.0,,...,,,,,,,,,,1.0
msg,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,0.0,,...,,,,,,,,,,1.0
user,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,0.0,,...,,,,,,,,,,1.0
rollovers,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,0.0,,...,,,,,,,,,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
operation,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,,,...,,,,,,,,,"{'scanAndOrder': 4, 'writeConflicts': 0}",1.0
queryExecutor,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,,,...,,,,,,,,,"{'scanned': 9, 'scannedObjects': 391}",1.0
record,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,,,...,,,,,,,,,{'moves': 0},1.0
repl,mongo,4.2.7,mongod,1,1321354.0,1321354617,1321354,2021-04-21 15:48:54.689,,,...,,,,,,,,,"{'executor': {'pool': {'inProgressCount': 0}, ...",1.0


### Connection Async

In [100]:
#uri = "mongodb://user:pass@localhost:27017/database_name"
uri = f"mongodb://root:{mongoPassword}@192.168.1.6:27017"
client = motor.motor_tornado.MotorClient(uri)
print(client)
dbs = list(await client.list_database_names())
print(dbs)

MotorClient(MongoClient(host=['192.168.1.6:27017'], document_class=dict, tz_aware=False, connect=False, driver=DriverInfo(name='Motor', version='2.3.1', platform='Tornado 6.1')))
['admin', 'calendar', 'config', 'internetArticles', 'local', 'test_database']


In [101]:
db = client['test_database']

In [77]:
result = await db.create_collection('test_collection')
print(result)

CollectionInvalid: collection test_collection already exists

### Create

In [102]:
import random
import string

def randomString(prefix='', N=8):
    return prefix + ''.join(random.choices(string.ascii_uppercase, k=N))

defaultStrs = {'name': 'user_', 'group': 'group_'}
defaultNums = {'phone': (602000000, 777999999)}
def randomDocument(strs=defaultStrs, nums=defaultNums):
    result = {}
    for key, value in strs.items():
        result[key] = randomString(value)
    for key, value in nums.items():
        result[key] = random.randint(value[0], value[1])
    return result

def heterogenizeDocument(doc, **values):
    result = {**doc}
    for key, value in values.items():
        if random.random() < 0.5:
            result[key] = randomString(key+'_')
    return result
    
extraFields = {'street': '', 'city': '', 'primarySchool': '', 'secondarySchool': ''}
nums = {**defaultNums, 'age': (15, 80), 'incomeY': (300000, 1500000), 'actualDebt': (0, 10000000)}

def getFullRndDoc():
    return heterogenizeDocument(randomDocument(defaultStrs, nums), **extraFields)

print(getFullRndDoc())
print(getFullRndDoc())

{'name': 'user_YRISSPVC', 'group': 'group_AGYIWDVT', 'phone': 612432812, 'age': 79, 'incomeY': 980869, 'actualDebt': 9609595, 'street': 'street_OMJWVXEH', 'city': 'city_AHAVUXFP'}
{'name': 'user_PRZDWJOV', 'group': 'group_VKIGPXNY', 'phone': 674023744, 'age': 23, 'incomeY': 371141, 'actualDebt': 2623848, 'primarySchool': 'primarySchool_IYAFWDKG'}


In [103]:
docs = [getFullRndDoc() for item in range(20)]
displayData(docs)

Unnamed: 0,name,group,phone,age,incomeY,actualDebt,street,secondarySchool,primarySchool,city
0,user_TTLQFRPX,group_EBXARBFU,768311920,17,698667,5630719,street_OSZYTJHW,secondarySchool_MJFNSAGU,,
1,user_YXLOSZKS,group_VJLMRINM,615693871,43,1031642,7197520,street_QZBAWWSY,,,
2,user_HZMXHLYM,group_OKCTRAGN,753778078,51,1337973,7201185,street_NCPVNXIR,secondarySchool_LACMBSZD,primarySchool_KHFZGZYF,
3,user_WKSAMLRQ,group_XDUZLGKW,674213075,15,689165,4558214,,,primarySchool_EMEFMRGT,city_SKTVKCJK
4,user_UUOZTBBO,group_IPBUMSIS,673325428,72,1431902,4868339,street_XQLJIDCQ,secondarySchool_BFXLLAYU,,
5,user_BZRBEZAN,group_CTEJQHIG,693989698,53,411031,8636171,street_WWIXDGVL,secondarySchool_WITXXGOG,,
6,user_HQGPZFUP,group_WEIGWBXV,621447323,54,1145119,7730555,street_GMFTRNNN,secondarySchool_RFTYFTIP,,
7,user_WPZJYNPV,group_MYCMYGUR,718999896,42,1262242,589590,,,primarySchool_MVAWYHNC,
8,user_GCRWJQQX,group_JXSNVZYR,636630175,18,1083653,1137719,,,primarySchool_FHUHBLRI,
9,user_SXICZZJK,group_FDQVFNZW,657093684,42,1218488,1859705,,secondarySchool_VMRFIXDI,primarySchool_AOLTZVRN,city_IURRVJNA


In [104]:
collection = db['test_collection']

async def do_insert():
    for i in range(10):
        document = getFullRndDoc()
        result = await db.test_collection.insert_one(document)
        print('result %s' % result)

await do_insert()

result <pymongo.results.InsertOneResult object at 0x7fa7dd4fe940>
result <pymongo.results.InsertOneResult object at 0x7fa7dd3113c0>
result <pymongo.results.InsertOneResult object at 0x7fa7dda66880>
result <pymongo.results.InsertOneResult object at 0x7fa7dcafd600>
result <pymongo.results.InsertOneResult object at 0x7fa7dd3113c0>
result <pymongo.results.InsertOneResult object at 0x7fa7dda66880>
result <pymongo.results.InsertOneResult object at 0x7fa7dda651c0>
result <pymongo.results.InsertOneResult object at 0x7fa7dda5a8c0>
result <pymongo.results.InsertOneResult object at 0x7fa7dd4fe940>
result <pymongo.results.InsertOneResult object at 0x7fa7dcafd600>


### Read

In [105]:
async def do_findAll():
    cursor = db.test_collection.find({})
    documents = await cursor.to_list(length=100)
    return documents

displayData(await do_findAll())

Unnamed: 0,_id,key,i,name,group,phone,age,incomeY,actualDebt,street,city,secondarySchool,primarySchool
0,606ec05c6b84c76e2a73545f,value,,,,,,,,,,,
1,606ec09a6b84c76e2a735460,value,128.0,,,,,,,,,,
2,606ec0a26b84c76e2a735461,value,0.0,,,,,,,,,,
3,606ecb166b84c76e2a735462,value,0.0,,,,,,,,,,
4,606ecb4f6b84c76e2a735463,value,0.0,,,,,,,,,,
5,606ecb4f6b84c76e2a735464,value,1.0,,,,,,,,,,
6,606ecb4f6b84c76e2a735465,value,2.0,,,,,,,,,,
7,606ecb4f6b84c76e2a735466,value,3.0,,,,,,,,,,
8,606ecb4f6b84c76e2a735467,value,4.0,,,,,,,,,,
9,606ecb4f6b84c76e2a735468,value,5.0,,,,,,,,,,


In [107]:
async def do_find():
    cursor = db.test_collection.find({'incomeY': {'$lt': 500000}}).sort('incomeY')
    documents = await cursor.to_list(length=100)
    return documents
        
allDocs = await do_find()
displayData(allDocs)
print(allDocs)

Unnamed: 0,_id,name,group,phone,age,incomeY,actualDebt,city,secondarySchool,street,primarySchool
0,60811b2857e0faf209cefc70,user_LRFQDHNJ,group_FPDWCEBA,650505663,20,340812,8599144,city_QHHWATIX,secondarySchool_VRTTNSBK,,
1,6080497757e0faf209cefc62,user_ODBCNOPF,group_ZLVNVXZN,617074854,47,347058,4635306,,secondarySchool_TILAZHIE,street_IXQSNYZO,
2,60811b2857e0faf209cefc73,user_RMIHYXJG,group_ITHQZSGP,650438624,54,356410,5566541,,,,primarySchool_AJYOTJDU
3,60811b2857e0faf209cefc6c,user_WHPCLTKZ,group_KORZHAAK,608991457,39,381079,668047,,,street_FZONCMYY,primarySchool_JEXIASEF
4,60811b2857e0faf209cefc6d,user_PMWUUHEL,group_COHLCHGX,691863043,29,390925,6129474,city_BMMJBUSU,,,
5,6080497757e0faf209cefc69,user_RGZEFOAB,group_JMKIJYMZ,647097734,68,456370,9095640,city_PCFNLMPH,,,
6,60811b2857e0faf209cefc71,user_MZZGMAMF,group_CPIXPJGV,706433785,43,466731,1772388,city_IRVGRIMB,,street_YYDRUKLR,primarySchool_FNTEPNCG
7,60811b2857e0faf209cefc6e,user_ASZBKYSK,group_RWSTIEHE,609888593,22,497227,2473178,,,street_CNXWTQPD,


[{'_id': ObjectId('60811b2857e0faf209cefc70'), 'name': 'user_LRFQDHNJ', 'group': 'group_FPDWCEBA', 'phone': 650505663, 'age': 20, 'incomeY': 340812, 'actualDebt': 8599144, 'city': 'city_QHHWATIX', 'secondarySchool': 'secondarySchool_VRTTNSBK'}, {'_id': ObjectId('6080497757e0faf209cefc62'), 'name': 'user_ODBCNOPF', 'group': 'group_ZLVNVXZN', 'phone': 617074854, 'age': 47, 'incomeY': 347058, 'actualDebt': 4635306, 'street': 'street_IXQSNYZO', 'secondarySchool': 'secondarySchool_TILAZHIE'}, {'_id': ObjectId('60811b2857e0faf209cefc73'), 'name': 'user_RMIHYXJG', 'group': 'group_ITHQZSGP', 'phone': 650438624, 'age': 54, 'incomeY': 356410, 'actualDebt': 5566541, 'primarySchool': 'primarySchool_AJYOTJDU'}, {'_id': ObjectId('60811b2857e0faf209cefc6c'), 'name': 'user_WHPCLTKZ', 'group': 'group_KORZHAAK', 'phone': 608991457, 'age': 39, 'incomeY': 381079, 'actualDebt': 668047, 'street': 'street_FZONCMYY', 'primarySchool': 'primarySchool_JEXIASEF'}, {'_id': ObjectId('60811b2857e0faf209cefc6d'), 'na

In [109]:
async def do_find_one():
    document = await db.test_collection.find_one({'name': {'$eq': 'user_ASZBKYSK'}})
    return document
    
print(await do_find_one())

{'_id': ObjectId('60811b2857e0faf209cefc6e'), 'name': 'user_ASZBKYSK', 'group': 'group_RWSTIEHE', 'phone': 609888593, 'age': 22, 'incomeY': 497227, 'actualDebt': 2473178, 'street': 'street_CNXWTQPD'}


In [110]:
async def do_findII():
    c = db.test_collection
    resultDocs = c.find({'i': {'$lt': 2}})
    async for document in resultDocs:
        print(document)
        
await do_findII()

{'_id': ObjectId('606ec0a26b84c76e2a735461'), 'key': 'value', 'i': 0}
{'_id': ObjectId('606ecb166b84c76e2a735462'), 'key': 'value', 'i': 0}
{'_id': ObjectId('606ecb4f6b84c76e2a735463'), 'key': 'value', 'i': 0}
{'_id': ObjectId('606ecb4f6b84c76e2a735464'), 'key': 'value', 'i': 1}
{'_id': ObjectId('606ecb586b84c76e2a73546d'), 'key': 'value', 'i': 0}
{'_id': ObjectId('60768df56b84c76e2a735470'), 'key': 'value', 'i': 0}
{'_id': ObjectId('60768df56b84c76e2a735471'), 'key': 'value', 'i': 1}


In [38]:
async def do_findIII():
    cursor = db.test_collection.find({'i': {'$lt': 4}})
    # Modify the query before iterating
    cursor.sort('i', -1).skip(1).limit(2)
    async for document in cursor:
        print(document)
        
await do_findIII()

{'_id': ObjectId('606ecb4f6b84c76e2a735465'), 'key': 'value', 'i': 2}
{'_id': ObjectId('606ecb4f6b84c76e2a735464'), 'key': 'value', 'i': 1}


In [85]:
async def do_findIV():
    cursor = db.test_collection.find({'i': {'$lt': 4}})
    # Modify the query before iterating
    cursor.sort('i', -1).skip(1).limit(2)
    async for document in cursor:
        yield document
        
asyncResult = do_findIV()
print(asyncResult)
async for item in asyncResult:
    print(item)

<async_generator object do_findIV at 0x7fa7dcae2b80>
{'_id': ObjectId('60768df56b84c76e2a735473'), 'key': 'value', 'i': 3}
{'_id': ObjectId('606ecb4f6b84c76e2a735465'), 'key': 'value', 'i': 2}


### Counting

In [111]:
async def do_count():
    n = await db.test_collection.count_documents({})
    print('%s documents in collection' % n)
    n = await db.test_collection.count_documents({'i': {'$gt': 1000}})
    print('%s documents where i > 1000' % n)
    
await do_count()

45 documents in collection
0 documents where i > 1000


### Update

In [113]:
async def do_replace():
    coll = db.test_collection
    old_document = await coll.find_one({'name': 'user_ASZBKYSK'})
    print('found document: %s' % old_document)
    _id = old_document['_id']
    result = await coll.replace_one({'_id': _id}, {'key': 'newValue'})
    print('replaced %s document' % result.modified_count)
    new_document = await coll.find_one({'_id': _id})
    print('document is now %s' % new_document)
    
await do_replace()

found document: {'_id': ObjectId('60811b2857e0faf209cefc6e'), 'name': 'user_ASZBKYSK', 'group': 'group_RWSTIEHE', 'phone': 609888593, 'age': 22, 'incomeY': 497227, 'actualDebt': 2473178, 'street': 'street_CNXWTQPD'}
replaced 1 document
document is now {'_id': ObjectId('60811b2857e0faf209cefc6e'), 'key': 'newValue'}


In [115]:
async def do_update():
    coll = db.test_collection
    result = await coll.update_one({'name': 'user_MZZGMAMF'}, {'$set': {'key': 'replacedValue'}})
    print('updated %s document' % result.modified_count)
    new_document = await coll.find_one({'name': 'user_MZZGMAMF'})
    print('document is now %s' % new_document)
    
await do_update()

updated 1 document
document is now {'_id': ObjectId('60811b2857e0faf209cefc71'), 'name': 'user_MZZGMAMF', 'group': 'group_CPIXPJGV', 'phone': 706433785, 'age': 43, 'incomeY': 466731, 'actualDebt': 1772388, 'street': 'street_YYDRUKLR', 'city': 'city_IRVGRIMB', 'primarySchool': 'primarySchool_FNTEPNCG', 'key': 'replacedValue'}


## Map / Reduce

Map - mapování, provedení funkcí nad datovou strukturou je paralelizovatelný proces. 

In [118]:
import random
import string

def randomStr(prefix='', N=8):
    return prefix + ''.join(random.choices(string.ascii_uppercase, k=N))

print(randomStr())

QSRZCBCJ


### Map

In [116]:
def intoDict(number):
    return {'id': number}

dataSequence = map(intoDict, range(10))
print(dataSequence)
mappedResult = list(dataSequence)
print(mappedResult)
displayData(mappedResult)

<map object at 0x7fa7dd4fdcd0>
[{'id': 0}, {'id': 1}, {'id': 2}, {'id': 3}, {'id': 4}, {'id': 5}, {'id': 6}, {'id': 7}, {'id': 8}, {'id': 9}]


Unnamed: 0,id
0,0
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9


In [119]:
def intoDict(number):
    return {'id': number}

def createName(item):
    return {**item, 'name': randomStr()}

def composeFunc(number):
    return createName(intoDict(number))

dataSequence = map(intoDict, range(10))
mapped = map(createName, dataSequence)
print(mapped)
mappedResult = list(mapped)
print(mappedResult)
displayData(mappedResult)

<map object at 0x7fa7dca23850>
[{'id': 0, 'name': 'WJPAFRAC'}, {'id': 1, 'name': 'YACEDGTB'}, {'id': 2, 'name': 'PRBKWYYP'}, {'id': 3, 'name': 'HXRFMHWL'}, {'id': 4, 'name': 'NSWSNPRK'}, {'id': 5, 'name': 'RWCCOZZS'}, {'id': 6, 'name': 'NVMEURWO'}, {'id': 7, 'name': 'SHYWFQFM'}, {'id': 8, 'name': 'MNSKRDUH'}, {'id': 9, 'name': 'NJSNNINU'}]


Unnamed: 0,id,name
0,0,WJPAFRAC
1,1,YACEDGTB
2,2,PRBKWYYP
3,3,HXRFMHWL
4,4,NSWSNPRK
5,5,RWCCOZZS
6,6,NVMEURWO
7,7,SHYWFQFM
8,8,MNSKRDUH
9,9,NJSNNINU


In [120]:
def funcReduce(*funcList):
    def result(item):
        resultItem = item
        for func in funcList:
            resultItem = func(resultItem)
        return resultItem
    return result

In [121]:
def intoDict(number):
    return {'id': number}

def createName(item):
    return {**item, 'name': randomStr()}

allOps = funcReduce(intoDict, createName)
mapped = map(allOps, range(10))
print(mapped)
mappedResult = list(mapped)
print(mappedResult)
displayData(mappedResult)

<map object at 0x7fa7dca238e0>
[{'id': 0, 'name': 'VTOHODUB'}, {'id': 1, 'name': 'BLYRGWYZ'}, {'id': 2, 'name': 'UQFTHHCU'}, {'id': 3, 'name': 'LHQBRGLG'}, {'id': 4, 'name': 'QSYRXNHD'}, {'id': 5, 'name': 'KENWGUOP'}, {'id': 6, 'name': 'GCUCKQHU'}, {'id': 7, 'name': 'XBBCKVZI'}, {'id': 8, 'name': 'CZUJEXYF'}, {'id': 9, 'name': 'MFHFSWGK'}]


Unnamed: 0,id,name
0,0,VTOHODUB
1,1,BLYRGWYZ
2,2,UQFTHHCU
3,3,LHQBRGLG
4,4,QSYRXNHD
5,5,KENWGUOP
6,6,GCUCKQHU
7,7,XBBCKVZI
8,8,CZUJEXYF
9,9,MFHFSWGK


### Filter

In [122]:
def justSome(item):
    return item['name'] < 'C'

filteredData = filter(justSome, mappedResult)
print(filteredData)
filteredResult = list(filteredData)
print(filteredResult)
displayData(filteredResult)

<filter object at 0x7fa7dca23bb0>
[{'id': 1, 'name': 'BLYRGWYZ'}]


Unnamed: 0,id,name
0,1,BLYRGWYZ


### Reduce

In [60]:
from functools import reduce

def count(acc, item):
    return acc + 1

result = reduce(count, filteredResult, 0)
print(result)

2


## Mongo Map+Reduce

In [123]:
db = client['test_database']
collection = db['test_collection']

async def do_findAll():
    cursor = db.test_collection.find({})
    documents = await cursor.to_list(length=100)
    return documents

displayData(await do_findAll())

Unnamed: 0,_id,key,i,name,group,phone,age,incomeY,actualDebt,street,city,secondarySchool,primarySchool
0,606ec05c6b84c76e2a73545f,value,,,,,,,,,,,
1,606ec09a6b84c76e2a735460,value,128.0,,,,,,,,,,
2,606ec0a26b84c76e2a735461,value,0.0,,,,,,,,,,
3,606ecb166b84c76e2a735462,value,0.0,,,,,,,,,,
4,606ecb4f6b84c76e2a735463,value,0.0,,,,,,,,,,
5,606ecb4f6b84c76e2a735464,value,1.0,,,,,,,,,,
6,606ecb4f6b84c76e2a735465,value,2.0,,,,,,,,,,
7,606ecb4f6b84c76e2a735466,value,3.0,,,,,,,,,,
8,606ecb4f6b84c76e2a735467,value,4.0,,,,,,,,,,
9,606ecb4f6b84c76e2a735468,value,5.0,,,,,,,,,,


In [125]:
#emit(this.name.substr(0, 4), this.actualDebt);
mapFunction = '''function() {
    if (this.name) {
       emit(0, this.actualDebt);
    }
};'''

reduceFunction ='''function(keyCustId, valuesPrices) {
   return Array.sum(valuesPrices);
};'''

reduced = await db.test_collection.inline_map_reduce(mapFunction, reduceFunction)
print(reduced)

[{'_id': 'user_BQGMDAMG', 'value': 452189.0}, {'_id': 'user_DOEXNAUD', 'value': 8551921.0}, {'_id': 'user_IJSMYFGX', 'value': 8681746.0}, {'_id': 'user_JTBESTZG', 'value': 4843805.0}, {'_id': 'user_LRFOCIVL', 'value': 3276299.0}, {'_id': 'user_LRFQDHNJ', 'value': 8599144.0}, {'_id': 'user_MIAKDLVJ', 'value': 3153444.0}, {'_id': 'user_MZZGMAMF', 'value': 1772388.0}, {'_id': 'user_ODBCNOPF', 'value': 4635306.0}, {'_id': 'user_OLGOKOCQ', 'value': 1177189.0}, {'_id': 'user_OZHLECMA', 'value': 9466912.0}, {'_id': 'user_OZZEZVWS', 'value': 4146690.0}, {'_id': 'user_PMWUUHEL', 'value': 6129474.0}, {'_id': 'user_PRGWSQKY', 'value': 6767986.0}, {'_id': 'user_QHXQNTPD', 'value': 73058.0}, {'_id': 'user_RGZEFOAB', 'value': 9095640.0}, {'_id': 'user_RMIHYXJG', 'value': 5566541.0}, {'_id': 'user_WHPCLTKZ', 'value': 668047.0}, {'_id': 'user_ZGZREBJX', 'value': 4960078.0}]


In [90]:
#awaited = await reduced
#print(awaited)

[{'_id': 0.0, 'value': nan}]
