# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient
from pymongo import ASCENDING, DESCENDING
dbName = "datamad0320"
mongodbURL = f"mongodb://localhost/{dbName}"
print(mongodbURL)
client = MongoClient(mongodbURL)
# Conectar la db
db = client.get_database()
print(db)
# Listar las colecciones
cols = db.list_collections()
for collection in cols:
    print(collection)

mongodb://localhost/datamad0320
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'datamad0320')
{'name': 'Crunch', 'type': 'collection', 'options': {}, 'info': {'readOnly': False, 'uuid': UUID('ebb70dde-3213-45d7-bb29-3aa938567994')}, 'idIndex': {'v': 2, 'key': {'_id': 1}, 'name': '_id_', 'ns': 'datamad0320.Crunch'}}


### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [28]:
import re
# Búsqueda por regex
match = re.compile(r'Babelgum')
# Hacer la búsqueda e imprimirla
companies = list(db.Crunch.find({"name":match}, {"name":1, "_id":0}))
for c in companies: print(c["name"])

Babelgum


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [59]:
query = {"number_of_employees": {"$gt": 5000} }
employees = (list(db.Crunch.find((query), {"name":1, "number_of_employees":1}).sort([("number_of_employees", DESCENDING)]).limit(20)))
for c in employees:
    print(c["name"], c["number_of_employees"])

Siemens 405000
IBM 388000
Toyota 320000
PayPal 300000
Nippon Telegraph and Telephone Corporation 227000
Samsung Electronics 221726
Accenture 205000
Tata Consultancy Services 200300
Flextronics International 200000
Safeway 186000
Sony 180500
LG 177000
Ford 171000
Boeing 160000
Digital Equipment Corporation 140000
Nokia 125000
MItsubishi Electric 107000
MItsubishi Electric 107000
Comcast 100000
Bertelsmann 100000


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [77]:
# La query
#query = {"$and":[{"founded_year":{"$gte": 2000}},{"founded_year":{"$lte": 2005}}]}
query = {"founded_year":{"$in": [i for i in range(2000,2006)]}}
c_years = (list(db.Crunch.find((query), {"name":1, "founded_year":1})))
# Imprimir
len(c_years)

3734

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [8]:
query = {"$and":[{"ipo.valuation_amount": {"$gt":100000000}}, {"founded_year": {"$lt":2010}}]}
results = (list(db.Crunch.find((query), {"name":1, "ipo":1}).limit(1000)))
# Comprobar
len(results)

42

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [81]:
query = {"$and":[{"number_of_employees": {"$lt":1000}}, {"founded_year": {"$lt":2005}}]}
results = (list(db.Crunch.find((query), {"name":1}).limit(10)))
# Comprobar
len(results)

10

### 6. All the companies that don't include the `partners` field.

In [89]:
query = { "partners": { "$exists": False } }
results = (list(db.Crunch.find((query), {"name":1})))
len(results)

0

### 7. All the companies that have a null type of value on the `category_code` field.

In [91]:
query = { "category_code": None}
results = (list(db.Crunch.find((query), {"name":1})))
len(results)

2751

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [95]:
query = {"$and":[{"number_of_employees": {"$gt":100}}, {"number_of_employees": {"$lt":1000}}]}
results = (list(db.Crunch.find((query), {"name":1, "number_of_employees":1})))
# Comprobar
len(results)

753

### 9. Order all the companies by their IPO price in a descending order.

In [138]:
# Lo he limitado a 5 para no petar el sistema
results = list(db.Crunch.find().sort([("ipo",-1)]).limit(5))

# Comprobar
len(results)


5

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [24]:
results = list(db.Crunch.find().sort([("number_of_employees", -1)]).limit(10))
len(results)
for e in results:
    print(e["name"])

Siemens
IBM
Toyota
PayPal
Nippon Telegraph and Telephone Corporation
Samsung Electronics
Accenture
Tata Consultancy Services
Flextronics International
Safeway


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [143]:
query = {"founded_month": {"$gte": 7}}
results = (list(db.Crunch.find((query), {"_id"}).limit(1000)))
len(results)

1000

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [37]:
query = {"$and":[{"founded_year": {"$lt": 2000}}, {"acquisition.price_amount": {"$gt": 10000}}]}
results = (list(db.Crunch.find((query)).limit(1000)))
len(results)

225

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [20]:
query = {"acquisition.acquired_year": {"$gt": 2010}}
results = (list(db.Crunch.find((query), {"name":1, "acquisition":1, "_id":0})
                .sort([("acquisition.acquired_year",1)]).limit(1000)))
print(len(results))
print(results[1])

736
{'name': 'Jingle Networks', 'acquisition': {'price_amount': 62500000, 'price_currency_code': 'USD', 'term_code': 'cash_and_stock', 'source_url': 'http://www.masshightech.com/stories/2011/04/11/daily11-Jingle-Networks-bought-for-up-to-625M.html', 'source_description': 'Jingle Networks bought for up to $62.5M', 'acquired_year': 2011, 'acquired_month': 4, 'acquired_day': 11, 'acquiring_company': {'name': 'Marchex', 'permalink': 'marchex'}}}


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [31]:
results = (list(db.Crunch.find({}, {"name":1, "founded_year":1}).sort([("founded_year",1)]).limit(1000)))
print(len(results))
print(results[0])

1000
{'_id': ObjectId('52cdef7c4bab8bd675297d92'), 'name': 'Flektor', 'founded_year': None}


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [33]:
query = {"founded_day": {"$lte": 7}}
results = (list(db.Crunch.find((query)).sort([("acquisition.price_amount",-1)]).limit(10)))
len(results)


10

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [34]:
query = {"$and":[{"category_code": "web"}, {"number_of_employees": {"$gt": 4000}}]}
results = (list(db.Crunch.find((query)).sort([("number_of_employees", 1)]).limit(1000)))
len(results)

9

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [35]:
query = {"$and":[{"acquisition.price_currency_code": "EUR"}, {"acquisition.price_amount": {"$gt": 10000000}}]}
results = (list(db.Crunch.find((query)).limit(1000)))
len(results)

7

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [36]:
query = {"acquisition.acquired_month": {"$lte": 3}}
results = (list(db.Crunch.find((query), {"_name":1}).limit(10)))
len(results)

10

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [10]:
query = {"$and": [{"founded_year": {"$gte": 2000,"$lte": 2010}}, {"acquisition.acquired_year": {"$gt": 2011}}]}
results = (list(db.Crunch.find((query), {"name":1}).limit(1000)))
print(len(results))


274


### 20. All the companies that have been 'deadpooled' after the third year.

In [None]:
# Your Code