# Advanced Querying Mongo

Importing libraries and setting up connection

In [2]:
!pip3 install pymongo

Collecting pymongo
  Downloading pymongo-3.10.1-cp37-cp37m-macosx_10_9_x86_64.whl (350 kB)
[K     |████████████████████████████████| 350 kB 2.4 MB/s eta 0:00:01
[?25hInstalling collected packages: pymongo
Successfully installed pymongo-3.10.1


In [26]:
from pymongo import MongoClient
dbName="companies.json"
mongodbURL = f"mongodb://localhost/{dbName}"
client = MongoClient(mongodbURL, connectTimeoutMS=2000,serverSelectionTimeoutMS=2000)
db = client.get_database()

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [31]:
# Your Code
query=db.companies.find({"name":"Babelgum"})
for n in query:
    print(n["name"])

Babelgum


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [61]:
# Your Code
from pymongo import ASCENDING, DESCENDING
employees=db.companies.find({"number_of_employees":{"$gt":5000}}).limit(20).sort([("number_of_employees", DESCENDING)])
for e in employees:
    print(e["name"],e["number_of_employees"])

Siemens 405000
IBM 388000
Toyota 320000
PayPal 300000
Nippon Telegraph and Telephone Corporation 227000
Samsung Electronics 221726
Accenture 205000
Tata Consultancy Services 200300
Flextronics International 200000
Safeway 186000
Sony 180500
LG 177000
Ford 171000
Boeing 160000
Digital Equipment Corporation 140000
Nokia 125000
MItsubishi Electric 107000
MItsubishi Electric 107000
Comcast 100000
Bertelsmann 100000


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [94]:
# Your Code
foundation_years=db.companies.find({"founded_year": {"$gte": 2000,"$lt":2006}},{"name":1,"founded_year":1})
for f in foundation_years:
    print(f["name"],f["founded_year"])

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [105]:
# Your Code
valuation=db.companies.find({"$and":[{"ipo.valuation_amount":{"$gte":100000000}},{"founded_year":{"$lt":2010}}]},{"name":1,"ipo":1})
for v in valuation:
    print(v["name"],v["ipo"])

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [186]:
# Your Code
employees_number=db.companies.find({"$and":[{"number_of_employees":{"$lt":1000}},{"founded_year":{"$lt":2005}}]}).limit(10).sort([("number_of_employees", DESCENDING)])
for e in employees_number:
    print(e["name"],e["number_of_employees"],e["founded_year"])

Infinera Corporation 974 2000
NorthPoint Communications Group 948 1997
888 Holdings 931 1997
Forrester Research 903 1983
SonicWALL 900 1991
Webmetrics 900 1999
Cornerstone OnDemand 881 1999
Yelp 800 2004
ZoomInfo 800 2000
MySpace 800 2003


### 6. All the companies that don't include the `partners` field.

In [113]:
# Your Code
partners_notexist=db.companies.find({"partners":{"$exists":"false"}})
for x in partners_notexist:
    print(x["name"])

### 7. All the companies that have a null type of value on the `category_code` field.

In [121]:
# Your Code
category_null=db.companies.find({"category_code":None}) # tambien se puede poner {"type":"null"}}
for x in category_null:
    print(x["name"])

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [124]:
# Your Code
employees_number2=db.companies.find({"number_of_employees":{"$gte": 100,"$lt":1000}},{"name":1,"number_of_employees":1})
for e in employees_number2:
    print(e["name"],e["number_of_employees"])

### 9. Order all the companies by their IPO price in a descending order.

In [131]:
# Your Code
ipo_price=db.companies.find({"ipo.valuation_amount":{"exists":"true"}}).sort([("ipo.valuation_amount", DESCENDING)])
for n in ipo_price:
    print(n["name"],n["ipo"]["valuation_amount"])

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [138]:
# Your Code
top_companies=db.companies.find({"number_of_employees":{"exists":True}}).limit(10).sort([("number_of_employees", DESCENDING)])
for e in top_companies:
    print(e["name"],e["number_of_employees"])

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [135]:
# Your Code
second_semester=db.companies.find({"founded_month":{"$gte": 6,"$lt":12}}).limit(1000)
for e in second_semester:
    print(e["name"],e["founded_month"])                             

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [146]:
# Your Code
companies=db.companies.find({"$and":[{"founded_year":{"$lt": 2000}},{"acquisition.price_amount":{"$gt": 1000000}}]})
for e in companies:
    print(e["name"],e["founded_year"],e["acquisition"]["price_amount"])  

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [151]:
# Your Code
companies2=db.companies.find({"acquisition.acquired_year":{"$gt": 2010}},{"acquisition.price_amount":1,"name":1}).sort([("acquisition.price_amount", DESCENDING)])
for e in companies2:
    print(e["name"],e["acquisition"])

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [152]:
# Your Code
companies_year=db.companies.find({"founded_year":{"exists":True}},{"name":1,"founded_year":1}).sort([("founded_year", DESCENDING)]) 
for x in companies_year:
    print(x["name"],x["founded_year"])

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [158]:
# Your Code
cursor=db.companies.find({"founded_day":{"$lte":7}}).limit(10).sort([("acquisition.price_amount", DESCENDING)]) 
for x in cursor:
    print(x["name"],x["founded_day"],x["acquisition"]["price_amount"])

Netscape 4 4200000000
PayPal 1 1500000000
Zappos 1 1200000000
Alibaba 1 1000000000
Postini 2 625000000
Danger 1 500000000
Clearwell Systems 6 410000000
PrimeSense 1 345000000
Amobee 1 321000000
BlueLithium 1 300000000


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [166]:
# Your Code
cursor2=db.companies.find({"$and":[{"category_code":{"$eq":"web"}},{"number_of_employees":{"$gt": 4000}}]}).sort([("number_of_employees", DESCENDING)])
for x in cursor2:
    print(x["name"],x["category_code"],x["number_of_employees"])

Experian web 15500
eBay web 15000
Yahoo! web 13600
Rakuten web 10000
Los Angeles Times Media Group web 10000
Groupon web 10000
Webkinz web 8657
AOL web 8000
Expedia web 4400


### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [165]:
# Your Code
cursor3=db.companies.find({"$and":[{"acquisition.price_amount":{"$gt": 10000000}},{"acquisition.price_currency_code":{"$eq":"EUR"}}]})
for x in cursor3:
    print(x["name"],x["acquisition"]["price_amount"],x["acquisition"]["price_currency_code"])

ZYB 31500000 EUR
Apertio 140000000 EUR
Greenfield Online 40000000 EUR
Webedia 70000000 EUR
Wayfinder 24000000 EUR
Tuenti Technologies 70000000 EUR
BioMed Central 43400000 EUR


### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [169]:
# Your Code
cursor4=db.companies.find({"acquisition.acquired_month":{"$lte":3}},{"name":1,"acquisition":1}).limit(10)
for x in cursor4:
    print(x["name"],x["acquisition"])

Kyte {'price_amount': None, 'price_currency_code': 'USD', 'term_code': None, 'source_url': 'http://techcrunch.com/2011/01/31/exclusive-kit-digital-acquires-kickapps-kewego-and-kyte-for-77-2-million/', 'source_description': 'KIT digital Acquires KickApps, Kewego AND Kyte For $77.2 Million', 'acquired_year': 2011, 'acquired_month': 1, 'acquired_day': 31, 'acquiring_company': {'name': 'KIT digital', 'permalink': 'kit-digital'}}
NetRatings {'price_amount': 327000000, 'price_currency_code': 'USD', 'term_code': 'cash', 'source_url': 'http://login.vnuemedia.com/hr/login/login_subscribe.jsp?id=0oqDem1gYIfIclz9i2%2Ffqj5NxCp2AC5DPbVnyT2da8GyV2mXjasabE128n69OrmcAh52%2FGE3pSG%2F%0AEKRYD9vh9EhrJrxukmUzh532fSMTZXL42gwPB80UWVtF1NwJ5UZSM%2BCkLU1mpYBoHFgiH%2Fi0f6Ax%0A9yMIVxt47t%2BHamhEQ0nkOEK24L', 'source_description': 'Nielsen buys rest of NetRatings', 'acquired_year': 2007, 'acquired_month': 2, 'acquired_day': None, 'acquiring_company': {'name': 'Nielsen', 'permalink': 'nielsen'}}
blogTV {'price_amou

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [174]:
# Your Code
cursor5=db.companies.find({"$and":[{"founded_year":{"$gte": 2000,"$lte":2010}},{"acquisition.acquired_year":{"$gt": 2011}}]}).limit(10)
for f in cursor5:
    print(f["name"],f["founded_year"],f["acquisition"]["acquired_year"])

Wetpaint 2005 2013
Digg 2004 2012
Geni 2006 2012
blogTV 2006 2013
Revision3 2005 2012
iContact 2003 2012
Mashery 2006 2013
KickApps 2004 2012
Netvibes 2005 2012
Dailymotion 2005 2013


### 20. All the companies that have been 'deadpooled' after the third year.

In [184]:
# Your Code
cursor6=db.companies.aggregate([{"$project":{"name": 1,"dateDifference":{"$subtract": [ "$founded_year","$deadpooled_year"]}}}])

#db.sales.aggregate( [ { $project: { item: 1, dateDifference: { $subtract: [ "$$NOW", "$date" ] } } } ] )