# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient
client = MongoClient()

In [2]:
db = client.get_database("companies")

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [3]:
e1 = list(db.companies.find({"name":"Babelgum"},{"name":1}))
print(e1)

[{'_id': ObjectId('52cdef7c4bab8bd675297da0'), 'name': 'Babelgum'}]


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [19]:
e2 = list(db.companies.find({"number_of_employees":{"$gt":500}}
                         ).sort([("number_of_employees",1)]).limit(20))

#I only print the length of the query, to prevent a monstruous print
print(len(e2))

20


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [5]:
e3 = list(db.companies.find({"founded_year":{"$gte":2000, "$lte":2005}},
                    {"name":1,"founded_year":1}))

#To check, I only print the first 3
print(e3[:3])

[{'_id': ObjectId('52cdef7c4bab8bd675297d8a'), 'name': 'Wetpaint', 'founded_year': 2005}, {'_id': ObjectId('52cdef7c4bab8bd675297d8c'), 'name': 'Zoho', 'founded_year': 2005}, {'_id': ObjectId('52cdef7c4bab8bd675297d8f'), 'name': 'Omnidrive', 'founded_year': 2005}]


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [6]:
filter_q = {"founded_year":{"$lt":2010},"ipo.valuation_amount":{"$gt":100000000} }
project_q = {"name":1,"ipo":1}
e4 = list(db.companies.find(filter_q,project_q ))

print(len(e4))

42


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [7]:
filter_q = {"founded_year":{"$lt":2005},"number_of_employees":{"$lt":1000} }
sort_q = [("number_of_employees",1)]
e5 = list(db.companies.find(filter_q).sort(sort_q).limit(10))

#Printing the name, employee number and year of foundation of the selection
for i in e5:
    print(i["name"],i["number_of_employees"],i["founded_year"])

Fox Interactive Media 0 1979
Skype 0 2003
Ticketmaster 0 1976
stylediary 0 2004
MindTouch 0 2004
Simpy 0 2004
Eurekster 0 2004
Compete 0 2000
EditGrid 0 2003
Monster 0 1994


### 6. All the companies that don't include the `partners` field.

In [8]:
filter_q = {"partners":{"$ne":None} }
e6 = list(db.companies.find(filter_q))
print(len(e6))

18801


### 7. All the companies that have a null type of value on the `category_code` field.

In [9]:
filter_q = {"category_code":{"$ne":None} }
e7 = list(db.companies.find(filter_q))
print(len(e7))

16050


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [10]:
filter_q = {"number_of_employees":{"$gte":100,"$lt":1000}}
project_q = {"name":1,"number_of_employees":1}
e8 = list(db.companies.find(filter_q,project_q))
print(len(e8))

917


### 9. Order all the companies by their IPO price in a descending order.

In [11]:
#I will put a limit of 40, just to execute this
filter_q = {"ipo.valuation_amount":{"$ne":None}}
e9 = list(db.companies.find(filter_q).sort([("ipo.valuation_amount",-1)]) )
print(len(e9))

61


### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [12]:
sort_q = [("number_of_employees",-1)]
e10 = list(db.companies.find({}).sort(sort_q).limit(10))

#Printing the name and number of employees of the selection
for i in e10:
    print(i["name"],i["number_of_employees"])

Siemens 405000
IBM 388000
Toyota 320000
PayPal 300000
Nippon Telegraph and Telephone Corporation 227000
Samsung Electronics 221726
Accenture 205000
Tata Consultancy Services 200300
Flextronics International 200000
Safeway 186000


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [13]:
filter_q = {"founded_month":{"$gte":6}}
e11 = list(db.companies.find(filter_q,project_q).limit(1000))

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [14]:
filter_q = {"founded_year":{"$lt":2000}, "acquisition.price_amount":{"$gt":100000}}
e12 = list(db.companies.find(filter_q))
print(len(e12))

225


### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [15]:
filter_q = {"acquisition.acquired_year":{"$gt":2010}}
project_q = {"name":1,"acquisition":1}
sort_q = [("acquisition.price_amount",1)]
e13 = list(db.companies.find(filter_q,project_q).sort(sort_q))
print(len(e13))

736


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [21]:
#OperationFailure: Executor error during find command :: caused by :: 
#Sort operation used more than the maximum 33554432 bytes of RAM... 

#limit only the 10 first to run this exercise

filter_q = {"founded_year":{"$ne":None}}
project_q = {"name":1,"founded_year":1}
sort_q = [("founded_year",1)]
e14 = list(db.companies.find(filter_q,project_q).sort(sort_q).limit(10))

for el in e14:
    print(el["name"],el["founded_year"])

Alstrasoft 1800
SmallWorlds 1800
US Army 1800
DuPont 1802
McKesson 1833
Bachmann Industries 1833
Bertelsmann 1835
Accuity 1836
CENTRA 1839
WeGame 1840


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [25]:
filter_q = {"founded_day":{"$lte":7}}
sort_q = [("acquisition.price_amount",-1)]
e15 = list(db.companies.find(filter_q).sort(sort_q).limit(10))
for el in e15:
    print(el["name"],el["founded_day"],el["acquisition"]["price_amount"])

Netscape 4 4200000000
PayPal 1 1500000000
Zappos 1 1200000000
Alibaba 1 1000000000
Postini 2 625000000
Danger 1 500000000
Clearwell Systems 6 410000000
PrimeSense 1 345000000
Amobee 1 321000000
BlueLithium 1 300000000


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [31]:
filter_q = {"number_of_employees":{"$gt":4000}, "category_code":"web"}
sort_q = [("number_of_employees",1)]
e16 = list(db.companies.find(filter_q).sort(sort_q).limit(10))

for el in e16:
    print(el["name"],el["number_of_employees"],el["category_code"])

Expedia 4400 web
AOL 8000 web
Webkinz 8657 web
Rakuten 10000 web
Los Angeles Times Media Group 10000 web
Groupon 10000 web
Yahoo! 13600 web
eBay 15000 web
Experian 15500 web


### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [40]:
filter_q = {"acquisition.price_amount":{"$gt":10000000},
            "acquisition.price_currency_code":"EUR"}
e17 = list(db.companies.find(filter_q))


for el in e17:
    print(el["name"],
          el["acquisition"]["price_amount"]
          ,el["acquisition"]["price_currency_code"])

ZYB 31500000 EUR
Apertio 140000000 EUR
Greenfield Online 40000000 EUR
Webedia 70000000 EUR
Wayfinder 24000000 EUR
Tuenti Technologies 70000000 EUR
BioMed Central 43400000 EUR


### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [42]:
filter_q = {"acquisition.acquired_month":{"$lte":3}}
project_q = {"name":1,"acquisition":1}
e18 = list(db.companies.find(filter_q,project_q).limit(10))

for el in e18:
    print(el["name"],el["acquisition"]["acquired_month"])

Kyte 1
NetRatings 2
blogTV 3
Livestream 1
iContact 2
Coghead 2
Dailymotion 2
Netvibes 2
BabyCenter 3
Flickr 3


# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [44]:
filter_q = {"founded_year":{"$gte":2000,"$lte":2010},
            "acquisition.acquired_year":{"$not":{"$lt":2011}}}
e19 = list(db.companies.find(filter_q))

print(len(e19))

9943


### 20. All the companies that have been 'deadpooled' after the third year.

In [57]:
filter_q = {"$expr":{"$gt":[{"$subtract":["$deadpooled_year","$founded_year"]},3]}} 
e20 = list(db.companies.find(filter_q))
print(len(e20))


#Checking
count = 0
for el in e20:
    if (el["deadpooled_year"] +3) > el["founded_year"]:
        count += 1

print(count)

437
437
