# Connecting to the database

In [49]:
from dotenv import load_dotenv, find_dotenv
import os
import pprint
from pymongo import MongoClient
from bson.objectid import ObjectId


load_dotenv(find_dotenv())

password = os.environ.get("MONGODB_PWD")
connection_string = f"mongodb+srv://danielfr500:{password}@firstmongo.chjgl.mongodb.net/?retryWrites=true&w=majority"

client = MongoClient(connection_string)

dbs = client.list_database_names()  # check list of dbs
print(dbs)

test_db = client.test  # Access db
collections = test_db.list_collection_names()  # access collection lists
print(collections)


['production', 'test', 'admin', 'local']
['test']


# CRUD

Creating the documents

In [3]:
# Create
def insert_test_document():
    # get access to test collection from test database using .notation
    collection = test_db.test
    test_document = {
        "name": "Tim",
        "type": "Test"
    }
    # inserts one document into a collection
    inserted_id = collection.insert_one(test_document).inserted_id
    print(inserted_id)


insert_test_document()


62dc41302470dc12304a1fdc


In [None]:
# if trying to access a db that doesn't exist - mongdb will create it
production = client.production
person_collection = production.person_collection


def create_documents():
    first_names = ["Tim", "Sarah", "Jennifer", "Jose", "Brad", "Allen"]
    last_names = ["Ruscica", "Smith", "Bart", "Cater", "Pit", "Geral"]
    ages = [21, 40, 23, 19, 34, 67]

    for first_name, last_name, age in zip(first_names, last_names, age):
        doc = {"first_name": first_name, "last_name": last_name, "age": age}
        person_collection.insert_one(doc)


create_documents()


In [26]:
person_collection.insert_one(
    {"first_name": "Tim", "last_name": "Cook", "age": 44})


<pymongo.results.InsertOneResult at 0x1f21134d3f0>

In [7]:
# a more efficent way to creat documents by inserting a list at one time
production = client.production
person_collection = production.person_collection


def create_documents():
    first_names = ["Tim", "Sarah", "Jennifer", "Jose", "Brad", "Allen"]
    last_names = ["Ruscica", "Smith", "Bart", "Cater", "Pit", "Geral"]
    ages = [21, 40, 23, 19, 34, 67]

    docs = []

    for first_name, last_name, age in zip(first_names, last_names, ages):
        doc = {"first_name": first_name, "last_name": last_name, "age": age}
        docs.append(doc)

    person_collection.insert_many(docs)


create_documents()


-- The equivalent code in SQL
CREATE TABLE person(
    _id INT NOT NULL,
    first_name VARCHAR(100),
    last_name VARCHAR(100),
    age INT,
    PRIMARY KEY(_id)
);

INSERT INTO person (
    first_name,
    last_name,
    age
) VALUES (
    "Tim",
    "Ruscica",
    21
)

Quering The Documents

In [34]:
printer = pprint.PrettyPrinter()


In [27]:
def find_all_people():
    people = person_collection.find()
    print(list(people))

    # if find() is left empty it finds everything
    # recieve in python dictionary format
    for person in people:
        printer.pprint(person)

    # print a list
find_all_people()


[{'_id': ObjectId('62dc41872470dc12304a1fdd'), 'first_name': 'Tim', 'last_name': 'Ruscica', 'age': 21}, {'_id': ObjectId('62dc41872470dc12304a1fde'), 'first_name': 'Sarah', 'last_name': 'Smith', 'age': 40}, {'_id': ObjectId('62dc41872470dc12304a1fdf'), 'first_name': 'Jennifer', 'last_name': 'Bart', 'age': 23}, {'_id': ObjectId('62dc41872470dc12304a1fe0'), 'first_name': 'Jose', 'last_name': 'Cater', 'age': 19}, {'_id': ObjectId('62dc41872470dc12304a1fe1'), 'first_name': 'Brad', 'last_name': 'Pit', 'age': 34}, {'_id': ObjectId('62dc41872470dc12304a1fe2'), 'first_name': 'Allen', 'last_name': 'Geral', 'age': 67}, {'_id': ObjectId('62dc89752470dc12304a1fe3'), 'last_name': 'Tim', 'first_name': 'Cook', 'age': 44}, {'_id': ObjectId('62dc89a02470dc12304a1fe4'), 'first_name': 'Tim', 'last_name': 'Cook', 'age': 44}]


In [21]:
# find (first) document based on field value
# can search by single {"first_name":"Tim"} or multiple fields {"first_name":"Tim","last_name":"Ruscica"}
# SQL would be ~ SELECT * FROM person WHERE first_name = "tim"

def find_tim():
    tim = person_collection.find_one(
        {"first_name": "Tim", "last_name": "Ruscica"})
    printer.pprint(tim)


find_tim()


{'_id': ObjectId('62dc41872470dc12304a1fdd'),
 'age': 21,
 'first_name': 'Tim',
 'last_name': 'Ruscica'}


In [28]:
# Can also search to find all instances
def find_tim():
    tim = person_collection.find({"first_name": "Tim"})
    printer.pprint(list(tim))


find_tim()


[{'_id': ObjectId('62dc41872470dc12304a1fdd'),
  'age': 21,
  'first_name': 'Tim',
  'last_name': 'Ruscica'},
 {'_id': ObjectId('62dc89a02470dc12304a1fe4'),
  'age': 44,
  'first_name': 'Tim',
  'last_name': 'Cook'}]


In [29]:
# Count all people
# leaving filter empty finds all of the documents (filter={})
# SQL: SELECT COUNT(*) FROM person

def count_all_people():
    count = person_collection.count_documents(filter={})
    print("Number of People", count)


count_all_people()


Number of People 8


In [36]:
# Find by ID
# Ids have to be converted from string to id objects in BSON
# SQL: SELECT * FROM person WHERE id = person_id

def get_person_by_id(person_id):
    from bson.objectid import ObjectId

    # Ids have to be converted from string to id objects in BSON
    _id = ObjectId(person_id)
    person = person_collection.find_one({"_id": _id})
    printer.pprint(person)


get_person_by_id("62dc89a02470dc12304a1fe4")


{'_id': ObjectId('62dc89a02470dc12304a1fe4'),
 'age': 44,
 'first_name': 'Tim',
 'last_name': 'Cook'}


In [58]:
# Query a range
# "$and" is a mongodb query operator that allows multiple items
# "$gte" is greater than or equal to
# "$lte" is less than or equal to
# .sort key is 1 for ascending, -1 for descending
# SQL: SELECT * FROM person WHERE age >= min_age AND age <= max_age

def get_age_range(min_age, max_age):
    query = {"$and": [
            {"age": {"$gte": min_age}},
            {"age": {"$lte": max_age}}
    ]}

    people = person_collection.find(query).sort("age", direction=-1)

    for person in people:
        printer.pprint(person)


get_age_range(20, 60)


{'_id': ObjectId('62dc89752470dc12304a1fe3'),
 'age': 44,
 'first_name': 'Cook',
 'last_name': 'Tim'}
{'_id': ObjectId('62dc89a02470dc12304a1fe4'),
 'age': 44,
 'first_name': 'Tim',
 'last_name': 'Cook'}
{'_id': ObjectId('62dc41872470dc12304a1fde'),
 'age': 40,
 'first_name': 'Sarah',
 'last_name': 'Smith'}
{'_id': ObjectId('62dc41872470dc12304a1fe1'),
 'age': 34,
 'first_name': 'Brad',
 'last_name': 'Pit'}
{'_id': ObjectId('62dc41872470dc12304a1fdf'),
 'age': 23,
 'first_name': 'Jennifer',
 'last_name': 'Bart'}
{'_id': ObjectId('62dc41872470dc12304a1fdd'),
 'age': 21,
 'first_name': 'Tim',
 'last_name': 'Ruscica'}


In [71]:
# Count the returned items in a Query range

# "$and" is a mongodb query operator that allows multiple items
# "$gte" is greater than or equal to
# "$lte" is less than or equal to


def get_age_range(min_age, max_age):
    query = {"$and": [
            {"age": {"$gte": min_age}},
            {"age": {"$lte": max_age}}
    ]}

    # count_documents() is depracated.
    people = production.person_collection.find(query)

    print(len(list(people)))  # count_documents() is depracated.
    # list(people) consumes the cursor - i.e.the cursor is empty afterwards
    print(list(people))


get_age_range(20, 60)


6
[]


In [81]:
# projection is inclusion - specific columns when you only want some columns,
# 0/false indicates don't want in results, 1/True is included
# _id is always returned unless false

def project_columns():
    columns = {"_id": False, "first_name": 1, "last_name": 1}
    people = person_collection.find({}, columns)

    for person in people:
        printer.pprint(person)


project_columns()


{'first_name': 'Tim', 'last_name': 'Ruscica'}
{'first_name': 'Sarah', 'last_name': 'Smith'}
{'first_name': 'Jennifer', 'last_name': 'Bart'}
{'first_name': 'Jose', 'last_name': 'Cater'}
{'first_name': 'Brad', 'last_name': 'Pit'}
{'first_name': 'Allen', 'last_name': 'Geral'}
{'first_name': 'Cook', 'last_name': 'Tim'}
{'first_name': 'Tim', 'last_name': 'Cook'}


Updating

In [89]:
# Update - create new field, if field existed
# "$set" creates or overrides fields, can also do multiple fields
# "$inc" increments a field by x amount, can also do multiple fields
# "$rename" renames the field name not the value, can also do multiple fields

def update_person_by_id(person_id):
    from bson.objectid import ObjectId

    _id = ObjectId(person_id)

    all_updates = {  # creating an 'update' object
        "$set": {"new_field": True},
        "$inc": {"age": 1},
        "$rename": {"first_name": "first", "last_name": "last"}

    }

    # pass in a query object that will return one object
    person_collection.update_one({"_id": _id}, all_updates)

    # Second :_id must be without quotes or won't update as it has to be the object
update_person_by_id("62dc41872470dc12304a1fdd")


In [90]:
# Update
# #Unset a field
# "$unset" removes something from the document -deletes the field name and value
# "$set" creates or overrides fields, can also do multiple fields
# "$inc" increments a field by x amount, can also do multiple fields
# "$rename" renames the field name not the value, can also do multiple fields

def update_person_by_id(person_id):
    from bson.objectid import ObjectId

    _id = ObjectId(person_id)

    # have to have something because you need key value but still deletes
    person_collection.update_one({"_id": _id}, {"$unset": {"new_field": ""}})


update_person_by_id("62dc41872470dc12304a1fdd")


In [91]:
# Replace a document
# ex: if someone is updating everything but want to keep the same id

def replace_one(person_id):
    from bson.objectid import ObjectId
    _id = ObjectId(person_id)

    new_doc = {
        "first_name": "new first name",
        "last_name": "new last name",
        "age": 100
    }

    person_collection.replace_one({"_id": _id}, new_doc)


replace_one("62dc41872470dc12304a1fdd")


DELETING

In [92]:
def delete_doc_by_id(person_id):
    from bson.objectid import ObjectId
    _id = ObjectId(person_id)

    person_collection.delete_one({"_id": _id})

    # person_collection.delete_many({}) # deletes every document
delete_doc_by_id(("62dc41872470dc12304a1fdd"))


Relationships: embedding and foreign key

In [None]:
# if the address belongs to the person...
# embedded document

person = {
    "_id": "62dc41872470dc12304a1fd6",
    "first_name": "John",
    "address": {
        "_id": "62dc41872470dc12304a1fde",
        "street": "Bay Street",
        "number": 2706,
        "city": "San Francisco",
        "country": "United States",
        "zip": "94107"

    }
}


In [93]:
# ex of function that embeds

address = {
    "_id": "62dc41872470dc12304a1fde",
    "street": "Bay Street",
    "number": 2706,
    "city": "San Francisco",
    "country": "United States",
    "zip": "94107"
}


def add_address_embed(person_id, address):
    from bson.objectid import ObjectId
    _id = ObjectId(person_id)

    # $addToSet indicates adds an array
    # if 'addresses' doesn't exist will create it
    # if it does exist will append to the end

    person_collection.update_one(
        {"_id": _id}, {"$addToSet": {'addresses': address}})


add_address_embed("62dc41872470dc12304a1fde", address)


In [None]:
# Foreign Key

address = {
    "_id": "62dc41872470dc12304a1fde",
    "street": "Bay Street",
    "number": 2706,
    "city": "San Francisco",
    "country": "United States",
    "zip": "94107",
    "owner_id": "62dc41872470dc12304a1fdd"  # references the owner
}

person = {
    "_id": "62dc41872470dc12304a1fdd",
    "first_name": "John"
}


In [94]:
# ex: of function that adds a relationship
# creates new collection called address with the address and owner id of the person

def add_address_relationship(person_id, address):
    from bson.objectid import ObjectId
    _id = ObjectId(person_id)

    address = address.copy()  # don't want to mutate the input object
    address["owner_id"] = person_id  # add field

    address_collection = production.address
    address_collection.insert_one(address)


add_address_relationship("62dc41872470dc12304a1fdf", address)
