# Schema Validation
# Bulk Inserting
# Data Modeling
# Advanced Queries
# PyMongo Arrow Demo


In [40]:
# https://www.youtube.com/watch?v=nYNAH8K_UhI&list=PLzMcBGfZo4-nX-NCYorkatzBxjqRlPkKB&index=2
from dotenv import load_dotenv, find_dotenv
import os
import pprint
from pymongo import MongoClient
from bson.objectid import ObjectId
from datetime import datetime as dt


load_dotenv(find_dotenv())

password = os.environ.get("MONGODB_PWD")
connection_string = f"mongodb+srv://danielfr500:{password}@firstmongo.chjgl.mongodb.net/?retryWrites=true&w=majority&authSource=admin"

client = MongoClient(connection_string)

dbs = client.list_database_names()  # check list of dbs

production = client.production


## Schema Validation

In [41]:
# Schema validation is a way of creating some type of structure in your MongoDB database

book_validator = {
    "$jsonSchema": {
        "bsonType": "object",
        "required": ["title", "authors", "publish_date", "type", "copies"],
        "properties": {
            "title": {
                    "bsonType": "string",
                    "description": "must be a string and is required"
            },
            "authors": {
                "bsonType": "array",
                "items": {
                    "bsonType": "objectId",
                    "description": "must be an objectid and is required"
                }
            },
            "publish_date": {
                "bsonType": "date",
                "description": "must be a date and is required"
            },
            "type": {
                "enum": ["Fiction", "Non-Fiction"],
                "description": "can only be one of th eenum values and is required"
            },
            "copies": {
                "bsonType": "int",
                "minimum": 0,
                "description": "must be an integer greater than 0 and is required"
            }
        }
    }
}


In Atlas admin, need to change/make sure that 
Database Access,
Edit User,
Built-in Role is set to "Atlas admin"

Also must ad &authSource=admin to end of connection string.

In [42]:
# create book collection within production database - in try block so can run multiple times

try:
    production.create_collection("book")
except Exception as e:
    print(e)


In [43]:
# modify the production by using mongodb command, collection, and the validator
production.command("collMod", "book", validator=book_validator)


{'ok': 1.0,
 '$clusterTime': {'clusterTime': Timestamp(1658695586, 8),
  'signature': {'hash': b'|\xb4\xea\x8a\xc4k\xc5\xf8%\xbb\xf3\x10Q\x1f\x8d\xe1\xc2\xc9\xc8N',
   'keyId': 7084684305783324685}},
 'operationTime': Timestamp(1658695586, 7)}

Validation JSON will show in MongoDB Compass in Validator tab

In [44]:
def create_author_collection():
    author_validator = {
        "$jsonSchema": {
            "bsonType": "object",
            "required": ["first_name", "last_name", "date_of_birth"],
            "properties": {
                "first_name": {
                    "bsonType": "string",
                    "description": "must be a string and is required"
                },
                "last_name": {
                    "bsonType": "string",
                    "description": "must be a string and is required"
                },
                "date_of_birth": {
                    "bsonType": "date",
                    "description": "must be a date and is required"
                }
            }
        }
    }

    try:
        production.create_collection("author")
    except Exception as e:
        print(e)

    production.command("collMod", "author", validator=author_validator)
create_author_collection()

## Bulk Inserting

In [45]:
def create_data():
    authors = [
        {
            "first_name": "Tim",
            "last_name": "Ruscica",
            "date_of_birth": dt(2000, 7, 20)
        },
        {
            "first_name": "George",
            "last_name": "Orwell",
            "date_of_birth": dt(1903, 6, 25)
        },
        {
            "first_name": "Herman",
            "last_name": "Melville",
            "date_of_birth": dt(1819, 8, 1)
        },
        {
            "first_name": "F. Scott",
            "last_name": "Fitzgerald",
            "date_of_birth": dt(1896, 9, 24)
        }
    ]
    author_collection = production.author
    authors = author_collection.insert_many(authors).inserted_ids # list of all inserted ids

    #Takes the authors list of ids and references the author within the book bulk insert
    #references passes in a list of ids
    
    books =[
        {
            "title":"MongoDB Advanced Tutorial",
            "authors":[authors[0],authors[1]],
            "publish_date":dt.today(),
            "type":"Non-Fiction",
            "copies":5
        },
        {
            "title":"Python For Dummies",
            "authors":[authors[0]],
            "publish_date":dt(2022, 1, 17),
            "type":"Non-Fiction",
            "copies":5
        },
        {
            "title":"Nineteen Eighty-Four",
            "authors":[authors[1]],
            "publish_date":dt(1949,6,8),
            "type":"Fiction",
            "copies":5
        },
        {
            "title":"Moby Dick",
            "authors":[authors[2]],
            "publish_date":dt.today(),
            "type":"Fiction",
            "copies":5
        },
        {
            "title":"The Great Gatsby",
            "authors":[authors[3]],
            "publish_date":dt(2014, 5,23),
            "type":"Fiction",
            "copies": 5
        }
    ]


    book_collection = production.book
    book_collection.insert_many(books)



create_data()

In [46]:
#Drops the collections
# production.book.drop()
# production.author.drop()

Data Modeling
### Embedding vs Refrences.

If you have a one-to-one relationship ex: 1 address for 1 person - can store embedded info

If you have 1 object that multiple entities (one-to-many, many-to-many) have a relationship and store in separate collection and use a reference. This uses less space and only have to edit in one place.

### Subset Pattern
stores a subset of frequently retrieved info to save time

## Advanced Queries
Could just return documents from the collection and parse with Python but this is not the most optimal way and will have a ton of data.
 

In [None]:
#Return all of the books that contain the letter "a"
