# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient
client = MongoClient()

In [2]:
db = client.get_database("companies")

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [3]:
e1 = list(db.companies.find({"name":"Babelgum"},{"name":1}))
print(e1)

[{'_id': ObjectId('52cdef7c4bab8bd675297da0'), 'name': 'Babelgum'}]


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [4]:
e2 = list(db.companies.find({"number_of_employees":{"$gt":500}}
                         ).sort([("number_of_employees",1)]).limit(29))

#I only print the length of the query, to prevent a monstruous print
print(len(e2))

29


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [5]:
e3 = list(db.companies.find({"founded_year":{"$gte":2000, "$lte":2005}},
                    {"name":1,"founded_year":1}))

#To check, I only print the first 3
print(e3[:3])

[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'), 'name': 'Wetpaint', 'founded_year': 2005}, {'_id': ObjectId('52cdef7c4bab8bd675297d8c'), 'name': 'Zoho', 'founded_year': 2005}, {'_id': ObjectId('52cdef7c4bab8bd675297d8f'), 'name': 'Omnidrive', 'founded_year': 2005}]


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [6]:
filter_q = {"founded_year":{"$lt":2010},"ipo.valuation_amount":{"$gt":100000000} }
project_q = {"name":1,"ipo":1}
e4 = list(db.companies.find(filter_q,project_q ))

print(len(e4))

42


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [34]:
filter_q = {"founded_year":{"$lt":2005},"number_of_employees":{"$lt":1000} }
sort_q = [("number_of_employees",1)]
e5 = list(db.companies.find(filter_q).sort(sort_q).limit(10))

#Printing the name, employee number and year of foundation of the selection
for i in e10:
    print(i["name"],i["number_of_employees"],i["founded_year"])

Siemens 405000 1847
IBM 388000 1896
Toyota 320000 1933
PayPal 300000 1998
Nippon Telegraph and Telephone Corporation 227000 1985
Samsung Electronics 221726 1969
Accenture 205000 2001
Tata Consultancy Services 200300 1968
Flextronics International 200000 1969
Safeway 186000 1915


### 6. All the companies that don't include the `partners` field.

In [8]:
filter_q = {"partners":{"$ne":None} }
e6 = list(db.companies.find(filter_q))
print(len(e6))

18801


### 7. All the companies that have a null type of value on the `category_code` field.

In [9]:
filter_q = {"category_code":{"$ne":None} }
e7 = list(db.companies.find(filter_q))
print(len(e7))

16050


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [10]:
filter_q = {"number_of_employees":{"$gte":100,"$lt":1000}}
project_q = {"name":1,"number_of_employees":1}
e8 = list(db.companies.find(filter_q,project_q))
print(len(e8))

917


### 9. Order all the companies by their IPO price in a descending order.

In [27]:
#OperationFailure: Executor error during find command :: caused by :: 
#Sort operation used more than the maximum 33554432 bytes of RAM. 
#Add an index, or specify a smaller limit., full error: {'ok': 0.0, 
#'errmsg': 'Executor error during find command :: caused by :: Sort operation 
#used more than the maximum 33554432 bytes of RAM. Add an index, or specify a 
#smaller limit.', 'code': 96, 'codeName': 'OperationFailed'}

#I will put a limit of 40, just to execute this

e9 = list(db.companies.find({}).sort([("ipo.valuation_amount",-1)]).limit(40) )

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [33]:
sort_q = [("number_of_employees",-1)]
e10 = list(db.companies.find({}).sort(sort_q).limit(10))

#Printing the name and number of employees of the selection
for i in e10:
    print(i["name"],i["number_of_employees"])

Siemens 405000
IBM 388000
Toyota 320000
PayPal 300000
Nippon Telegraph and Telephone Corporation 227000
Samsung Electronics 221726
Accenture 205000
Tata Consultancy Services 200300
Flextronics International 200000
Safeway 186000


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [None]:
# Your Code

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [None]:
# Your Code

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [None]:
# Your Code

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [None]:
# Your Code

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [None]:
# Your Code

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [None]:
# Your Code

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [None]:
# Your Code

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [None]:
# Your Code

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [None]:
# Your Code

### 20. All the companies that have been 'deadpooled' after the third year.

In [None]:
# Your Code