## Data Storage Scratchpad
This is an interactive playground to experimenent and learn the MongoDB database API using `pymongo`.

We are using a mock instance of the MongoClient, so the docker container do not need to be running.

### Setup

In [1]:
from mongomock import MongoClient
from mongomock.collection import Collection

client = MongoClient()

# create new database
db=client["automl"]

# create new collections, similar to tables in relatonal dbs
datasets: Collection = db["datasets"]
trainings: Collection = db["trainings"]
models: Collection = db["models"]

## Write Documents

In [2]:

result = datasets.insert_one({
    "id": "dataset001",
    "name": "titanic.csv",
    "user_id": "acb123",
    "path": "/tmp/titanic.csv",
    "analysis": { 
        # provided by dataset analysis team
        "noideawhatthisis": True 
    },
    "creation_date": "2022-05-18 10:25"
})
print(f"dataset id {result.inserted_id}")

result = trainings.insert_one({
    "id": "training001",
    "start_date": "",
    "end_date": "",
    "configuration": { 
        "somejson": True
    },
    "status": "running"
})
print(f"training id {result.inserted_id}")

result = models.insert_one({
    "id": "model001",
    "configuration": { 
        "somejson": True
    },
    "predictiontime": 12345.67,
    "metric_score": 0.001
})
print(f"model id {result.inserted_id}")

dataset id 62a73be9f999b4f679446857
training id 62a73be9f999b4f679446858
model id 62a73be9f999b4f679446859


## Read Documents

In [3]:

result = datasets.find_one({ "id": "dataset001" })
print(f"found dataset with name: {result['name']}")

result = trainings.find_one({ "id": "training001" })
print(f"found dataset with configuration: {result['configuration']}")

result = models.find_one({ "id": "model001" })
print(f"found model with predictiontime: {result['predictiontime']}")

found dataset with name: titanic.csv
found dataset with configuration: {'somejson': True}
found model with predictiontime: 12345.67


## Update Documents

In [4]:

result = trainings.update_one({ "id": "training001" }, { "$set": { "status": "finished" }})

result = trainings.find_one({ "id": "training001" })
print(f"found dataset with configuration: {result}")

found dataset with configuration: {'id': 'training001', 'start_date': '', 'end_date': '', 'configuration': {'somejson': True}, 'status': 'finished', '_id': ObjectId('62a73be9f999b4f679446858')}
