# CHANGE IMPORT LINES' PATH NAMES

In [1]:
from pymongo import MongoClient
from pprint import pprint

# MONGOSH ON TERMINAL

### ———— BASICS ———— 

— lists all dbs
Show dbs

— switches to specific db
Use travel_db

— shows current db
Db

— Creates collection
db.createCollection(‘destinations’)

— lists collections
Show collections

— inserts data into the collection
db.destinations.insertOne(json format)

— shows all info in collection
db.destinations.find()

— query specific dict
db.destinations.find({dict_name})


### ———— CRUD ———— 


— updates Egypt’s continent to Antarctica 
db.destinations.updateOne({‘country’: ‘Egypt’}, {$set: {‘continent’: ‘Antarctica’}})

— updates all countries’ continents to Antarctica 
db.destinations.updateMany({‘country’: ‘Egypt’}, {$set: {‘continent’: ‘Antarctica’}})

— adds data
db.destinations.update({"country": "Morocco"}, {$push: {"major_cities": "Agadir"}})

— deletes all data from collection
db.destinations.remove()

— removes specific entry
db.destinations.remove({"country": "USA"}, {justOne: true})

— deletes collection
db.destinations.drop()

— deleted database
db.dropDatabase()


### ———— IMPORTING ———— 


mongoimport --type csv -d dbName -c collectionName --headerline --drop filename.csv

mongoimport --type json -d dbName -c collectionName --drop --jsonArray filename.json


### ———— USEFUL TOOLS ———— 


— increment integer
{$inc: {‘key’: int}}

— set key to specified type
db.mechanics.updateMany({}, [{'$set': {"wages.hourly_rate" : {'$toDouble': "$wages.hourly_rate"}}}])



### BASICS

In [2]:
mongo = MongoClient(port=27017)

In [3]:
# List all databases
mongo.list_database_names()

['admin', 'config', 'local', 'notes', 'test']

In [4]:
# Assign specified db
db = mongo['notes']

In [5]:
# List collections in db
db.list_collection_names()

['ohio_daily_records',
 'mechanics',
 'customer_list',
 'annual_aqi_by_county',
 'customers',
 'ohio_air']

In [6]:
db.classroom.find_one()

### CRUD

In [7]:
# Create a mongo db 
db = mongo.classDB

# Declare a collection
classroom = db.classroom

In [8]:
# Insert a document into the 'classroom' collection
classroom.insert_one(
    {
        'name': 'Ahmed',
        'row': 3,
        'favorite_python_library': 'Matplotlib',
        'hobbies': ['Running', 'Stargazing', 'Reading']
    }
)

InsertOneResult(ObjectId('67624a1cb5d8460f76d59eba'), acknowledged=True)

In [9]:
# Query the collection.
students = classroom.find()
for student in students:
    print(student)

{'_id': ObjectId('67624a1cb5d8460f76d59eba'), 'name': 'Ahmed', 'row': 3, 'favorite_python_library': 'Matplotlib', 'hobbies': ['Running', 'Stargazing', 'Reading']}


In [10]:
# Update
classroom.update_one(
    {'name': 'Ahmed'},
    {'$set':
        {'row': 4}
     }
)

# Query
for i in classroom.find():
    print(i)

{'_id': ObjectId('67624a1cb5d8460f76d59eba'), 'name': 'Ahmed', 'row': 4, 'favorite_python_library': 'Matplotlib', 'hobbies': ['Running', 'Stargazing', 'Reading']}


In [11]:
# Add an item to a document array
classroom.update_one(
    {'name': 'Ahmed'},
    {'$push':
        {'hobbies': 'Listening to country music'}
    }
)

# Query
for i in classroom.find():
    print(i)

{'_id': ObjectId('67624a1cb5d8460f76d59eba'), 'name': 'Ahmed', 'row': 4, 'favorite_python_library': 'Matplotlib', 'hobbies': ['Running', 'Stargazing', 'Reading', 'Listening to country music']}


In [12]:
# Delete a field from a document
classroom.update_one({'name': 'Ahmed'},
     {'$unset':
          {'row': ""}
     }
)

# Query
for i in classroom.find():
    print(i)

{'_id': ObjectId('67624a1cb5d8460f76d59eba'), 'name': 'Ahmed', 'favorite_python_library': 'Matplotlib', 'hobbies': ['Running', 'Stargazing', 'Reading', 'Listening to country music']}


In [13]:
# Delete a document from a collection

# or classroom.delete_many(query)
classroom.delete_one(
    {'name': 'Ahmed'}
)

# Query
for i in db.classroom.find():
    print(i)

In [14]:
# myclient = pymongo.MongoClient("mongodb://localhost:27017/")
# mydb = myclient["mydatabase"]

db["classroom"].drop()
# MongoClient().drop_database('classDB')

### IMPORTING DOCUMENTS

In [15]:
# CSV files
# ! mongoimport --type csv -d petsitly_marketing -c customer_list --headerline --drop ../Resources/customer_database.csv

In [16]:
# JSON Files
# ! mongoimport --type json -d petsitly_marketing -c customer_list --drop --jsonArray ../Resources/customer_list.json

### QUERYING

In [17]:
# Imports if needed
# ! mongoimport --type csv -d notes -c annual_aqi_by_county --headerline --drop ../UofM-VIRT-DATA-PT-09-2024-U-LOLC/12-NoSQL-Databases/2/Activities/06-Stu_AirFields/Resources/annual_aqi_by_county_2022.csv
# ! mongoimport --type json -d notes -c ohio_daily_records --drop --jsonArray ../UofM-VIRT-DATA-PT-09-2024-U-LOLC/12-NoSQL-Databases/2/Activities/06-Stu_AirFields/Resources/ohio_daily_records_2022.json
# ! mongoimport --type json -d notes -c ohio_air --drop --jsonArray ../UofM-VIRT-DATA-PT-09-2024-U-LOLC/12-NoSQL-Databases/2/Activities/06-Stu_AirFields/Resources/ohio_jan_2022.json
# ! mongoimport --type json -d notes -c ohio_air --jsonArray ../UofM-VIRT-DATA-PT-09-2024-U-LOLC/12-NoSQL-Databases/2/Activities/06-Stu_AirFields/Resources/ohio_feb_2022.json

In [18]:
# Set db to epa database and variables to collections
db = mongo['notes']
ohio_daily_records = db['ohio_daily_records']
ohio_air = db['ohio_air']
annual_aqi_by_county = db['annual_aqi_by_county']
db.list_collection_names()

['ohio_daily_records',
 'mechanics',
 'customer_list',
 'annual_aqi_by_county',
 'customers',
 'ohio_air']

In [19]:
# Count of documents in collection
annual_aqi_by_county.count_documents({})

647

In [20]:
# Count from specified query
query = {'parameter': "Sulfur dioxide"}
print("Number of documents in result:", ohio_air.count_documents(query))

Number of documents in result: 6764


In [21]:
# JSON pprint
pprint(ohio_air.find_one(query))

{'_id': ObjectId('6762460f768c4557c8f32e4d'),
 'aqi': None,
 'arithmetic_mean': 0.0,
 'cbsa': 'Cleveland-Elyria, OH',
 'cbsa_code': '17460',
 'city': 'Newburgh Heights',
 'county': 'Cuyahoga',
 'county_code': '035',
 'date_local': '2022-01-01',
 'date_of_last_change': '2022-09-19',
 'datum': 'NAD83',
 'event_type': 'No Events',
 'first_max_hour': 23,
 'first_max_value': 0.0,
 'latitude': 41.446624,
 'local_site_name': 'Harvard Yards',
 'longitude': -81.662356,
 'method': 'Instrumental - Ecotech Serinus 50',
 'method_code': '188',
 'observation_count': 1,
 'observation_percent': 100.0,
 'parameter': 'Sulfur dioxide',
 'parameter_code': '42401',
 'poc': 1,
 'pollutant_standard': 'SO2 24-hour 1971',
 'sample_duration': '24-HR BLK AVG',
 'sample_duration_code': 'X',
 'site_address': '4600 HARVARD AVE.',
 'site_number': '0065',
 'state': 'Ohio',
 'state_code': '39',
 'units_of_measure': 'Parts per billion',
 'validity_indicator': 'Y'}


In [22]:
# Query ONLY Speified Fields in Document
query = {}
fields = {'parameter': 1, 
          'wages.units_of_measure': 1, 
          'observation_count': 1 , 
          'date_local': 1, 
          'local_site_name': 1, 
          'site_address': 1, 
          'city': 1, 
          'county': 1}

# print
results = ohio_air.find(query, fields)
for i in range(2):
    pprint(results[i])

{'_id': ObjectId('6762460f768c4557c8f32e4d'),
 'city': 'Newburgh Heights',
 'county': 'Cuyahoga',
 'date_local': '2022-01-01',
 'local_site_name': 'Harvard Yards',
 'observation_count': 1,
 'parameter': 'Sulfur dioxide',
 'site_address': '4600 HARVARD AVE.'}
{'_id': ObjectId('6762460f768c4557c8f32e4e'),
 'city': 'Newburgh Heights',
 'county': 'Cuyahoga',
 'date_local': '2022-01-01',
 'local_site_name': 'Harvard Yards',
 'observation_count': 24,
 'parameter': 'Sulfur dioxide',
 'site_address': '4600 HARVARD AVE.'}


In [23]:
# # Query ALL EXCEPT Speified Fields in Document
query = {}
fields = {"COUNTY_CODE": 0, 
          "STATE_CODE": 0, 
}

# print
results = ohio_daily_records.find(query, fields)
for i in range(2):
    pprint(results[i])

{'CBSA_CODE': 17460.0,
 'CBSA_NAME': 'Cleveland-Elyria, OH',
 'CO': {'AQS_PARAMETER_CODE': '42101',
        'AQS_PARAMETER_DESC': 'Carbon monoxide',
        'DAILY_AQI_VALUE': '0',
        'DAILY_OBS_COUNT': 24,
        'Daily Max 8-hour CO Concentration': 0.0,
        'PERCENT_COMPLETE': 100.0,
        'UNITS': 'ppm'},
 'COUNTY': 'Cuyahoga',
 'Date': '01/08/2022',
 'NO2': None,
 'POC': 1,
 'SITE_LATITUDE': 41.504605,
 'SITE_LONGITUDE': -81.69045,
 'SO2': None,
 'STATE': 'Ohio',
 'Site ID': 390350051,
 'Site Name': 'Galleria',
 'Source': 'AQS',
 '_id': ObjectId('6762460eacc6893f752eb6ee')}
{'CBSA_CODE': 17460.0,
 'CBSA_NAME': 'Cleveland-Elyria, OH',
 'CO': {'AQS_PARAMETER_CODE': '42101',
        'AQS_PARAMETER_DESC': 'Carbon monoxide',
        'DAILY_AQI_VALUE': '2',
        'DAILY_OBS_COUNT': 24,
        'Daily Max 8-hour CO Concentration': 0.2,
        'PERCENT_COMPLETE': 100.0,
        'UNITS': 'ppm'},
 'COUNTY': 'Cuyahoga',
 'Date': '01/05/2022',
 'NO2': None,
 'POC': 1,
 'SITE_LAT

In [24]:
# Query and return specified fields
query = {'CO.UNITS': "ppm", "NO2.UNITS": "ppb"}
fields = {"CBSA_NAME": 1, "COUNTY": 1, "Site Name": 1, "Date": 1, "CO": 1, "NO2": 1, "SO2": 1}

results = ohio_daily_records.find(query, fields)
for i in range(2):
    pprint(results[i])

{'CBSA_NAME': 'Cleveland-Elyria, OH',
 'CO': {'AQS_PARAMETER_CODE': '42101',
        'AQS_PARAMETER_DESC': 'Carbon monoxide',
        'DAILY_AQI_VALUE': '3',
        'DAILY_OBS_COUNT': 18,
        'Daily Max 8-hour CO Concentration': 0.3,
        'PERCENT_COMPLETE': 75.0,
        'UNITS': 'ppm'},
 'COUNTY': 'Cuyahoga',
 'Date': '01/01/2022',
 'NO2': {'AQS_PARAMETER_CODE': '42602',
         'AQS_PARAMETER_DESC': 'Nitrogen dioxide (NO2)',
         'DAILY_AQI_VALUE': 12,
         'DAILY_OBS_COUNT': 23,
         'Daily Max 1-hour NO2 Concentration': 13.0,
         'PERCENT_COMPLETE': 96.0,
         'UNITS': 'ppb'},
 'SO2': {'AQS_PARAMETER_CODE': '42401',
         'AQS_PARAMETER_DESC': 'Sulfur dioxide',
         'DAILY_AQI_VALUE': 0,
         'DAILY_OBS_COUNT': 23,
         'Daily Max 1-hour SO2 Concentration': 0.0,
         'PERCENT_COMPLETE': 96.0,
         'UNITS': 'ppb'},
 'Site Name': 'GT Craig NCore',
 '_id': ObjectId('6762460eacc6893f752eb7ac')}
{'CBSA_NAME': 'Cleveland-Elyria, OH',


In [25]:
# Change the data type
ohio_daily_records.update_many({}, [ {'$set':{ "CO.PERCENT_COMPLETE" : {'$toDouble': "$CO.PERCENT_COMPLETE"}}} ])

UpdateResult({'n': 7335, 'nModified': 0, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

### COMPARISON OPERATORS

In [26]:
# ! mongoimport --type json -d notes -c customer_list --drop --jsonArray ../UofM-VIRT-DATA-PT-09-2024-U-LOLC/12-NoSQL-Databases/2/Activities/08-Stu_FindPets/Resources/customer_list.json
db = mongo['notes']
customers = db['customer_list']


In [27]:
# $lte <=
# $gte >=
# $lt <
# $gt >
# $eq =
# $neq !=
# $in if in list
# $nin if not in list
# $regex if expression in value

q = customers.find({'2021_Visits': {'$gt': 50}})
# q = customers.find({'2021_Total_Spend': {'$lte': 250}})
# q = customers.find({'Address': {'$regex': 'Suite'}})
# q = customers.find({'Pet_Type': {'$in': ['turtle', 'fish']}})

for i in range(2):
    pprint(q[i])

{'2021_Total_Spend': 1483.5,
 '2021_Visits': 86,
 'Address': '096 Jimenez Turnpike Apt. 451',
 'Customer_First': 'Leesa',
 'Customer_Last': 'White',
 'Email': 'kmassey@gmail.com',
 'Pet_Type': 'cat',
 '_id': ObjectId('676246f1e64fd29d792c0838')}
{'2021_Total_Spend': 1017.75,
 '2021_Visits': 59,
 'Address': '5123 Preston Plain',
 'Customer_First': 'Victoriano',
 'Customer_Last': 'McClure',
 'Email': 'christopher91@yahoo.com',
 'Pet_Type': 'dog',
 '_id': ObjectId('676246f1e64fd29d792c083a')}


### SORTING

In [28]:
# ! mongoimport --type json -d notes -c mechanics --drop --jsonArray ../UofM-VIRT-DATA-PT-09-2024-U-LOLC/12-NoSQL-Databases/2/Activities/05-Ins_SelectingFields/Resources/mechanics.json
# ! mongoimport --type csv -d notes -c customers --headerline --drop ../UofM-VIRT-DATA-PT-09-2024-U-LOLC/12-NoSQL-Databases/2/Activities/05-Ins_SelectingFields/Resources/customers.csv
customers = db['customers']


In [29]:
# query Nissans and Hyundais
query = {'car_make': {'$in': ["Nissan", "Hyundai"]}}
# Select fields
fields = {'car_vin': 0, 'last_service': 0}
# Sorts car_year descending, then last_service ascending
sort = [('car_year', -1), ('last_service', 1)]
# limits 5 results
limit = 5

pprint(list(customers.find(query, fields).sort(sort).limit(limit)))

[{'_id': 416,
  'car_make': 'Nissan',
  'car_model': 'Versa',
  'car_year': 2012,
  'full_name': 'Trina Pride'},
 {'_id': 966,
  'car_make': 'Hyundai',
  'car_model': 'Tucson',
  'car_year': 2012,
  'full_name': 'Corny Marryatt'},
 {'_id': 488,
  'car_make': 'Nissan',
  'car_model': 'Sentra',
  'car_year': 2012,
  'full_name': 'Katerine Giffin'},
 {'_id': 204,
  'car_make': 'Nissan',
  'car_model': 'Maxima',
  'car_year': 2011,
  'full_name': 'Alane Sturgess'},
 {'_id': 969,
  'car_make': 'Hyundai',
  'car_model': 'Santa Fe',
  'car_year': 2011,
  'full_name': 'Feodor Bellin'}]


In [30]:
mongo.list_database_names()

['admin', 'config', 'local', 'notes', 'test']

In [31]:
db = mongo['notes']

In [32]:
db.list_collection_names()

['ohio_daily_records',
 'mechanics',
 'customer_list',
 'annual_aqi_by_county',
 'customers',
 'ohio_air']