# Connecting to the MongoDB Atlas Server

In [None]:
# uses 'sys' and 'os' to convert the standard relative path to an absolute path
import sys, os
# abspath() returns the absolute path of this file, using '..' to navigate to the parent path
sys.path.append(os.path.abspath('..')) # <-- new path appended
from my_config import MONGODB_ATLAS_CONNECTION_PASSWORD

# packages from Mongo Atlas to connect to the server
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

uri = f'mongodb+srv://dihparaguai:{MONGODB_ATLAS_CONNECTION_PASSWORD}@cluster-pipeline-python.mwvhw.mongodb.net/?retryWrites=true&w=majority&appName=cluster-pipeline-python-mongodbatlas'

# create a client object and connect to the server
client = MongoClient(uri, server_api=ServerApi('1'))

# send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("You successfully connected to MongoDB!")
except Exception as e:
    print(e)

# Connecting to Database

In [None]:
db = client['db_produtos']
collection = db['produtos']

# Transform columns name

In [None]:
# read all the documents, but the return is a cursor
cursor = collection.find({})

# iterates all the documents inside the cursor
for doc in cursor:
    print(doc)

In [None]:
# criteria for renaming the columns
update = {'$rename': {
    'lat': 'Latitude', 
    'lon': 'Longitude'}}

# command to update the documents
collection.update_many({}, update)

# reads and iterates through all the documents
cursor = collection.find({})
for doc in cursor:
    print(doc)

# Save documents with the 'Categoria do Produto: livros' criteria in a CSV file

In [None]:
# lists all the product categories'
collection.distinct('Categoria do Produto')

In [None]:
# criteria for the filter
query = {'Categoria do Produto': 'livros'}

cursor = collection.find(query)

# reads, iterates and saves data into a list
prod_categ_livros = []
for doc in cursor:
    print(doc)
    prod_categ_livros.append(doc)

In [None]:
import pandas as pd

# saves the list into a dataframe
df = pd.DataFrame(prod_categ_livros)

# shows the type of the columns
df.info()

In [None]:
# transform the data type of the column
df['Data da Compra'] = pd.to_datetime(df['Data da Compra'], format='%d/%m/%Y')

df.info()

In [None]:
# formats the date from d/m/Y to Y/m/d
df['Data da Compra'] = df['Data da Compra'].dt.strftime('%Y/%m/%d')

df

In [None]:
# saves the dataframe into a csv file without index
df.to_csv('../data/produtos_categoria_livros.csv', index=False)

# Save documents with the 'Data da Compra' greater than 2020

In [None]:
# filters and saves the data into a list

query = {'Data da Compra': {'$regex': '/202[1-9]'}}

cursor = collection.find(query)

prod_data_compra_maior_que_2020 = []
for doc in cursor:
    print(doc)
    prod_data_compra_maior_que_2020.append(doc)

In [None]:
# creates a DataFrame using the list and formats the data type of the column

df = pd.DataFrame(prod_data_compra_maior_que_2020)

df['Data da Compra'] = pd.to_datetime(df['Data da Compra'], format='%d/%m/%Y')

df.info()

In [None]:
# adjusts the order of the date and saves it into a csv file

df['Data da Compra'] = df['Data da Compra'].dt.strftime('%Y/%m/%d')

df.info()

df.to_csv('../data/produtos_data_compra_maior_que_2020.csv', index=False)

In [None]:
client.close()