# Advanced Querying Mongo

Importing libraries and setting up connection

In [19]:
from pymongo import MongoClient
from pymongo import ASCENDING, DESCENDING
dbName = "companies"
mongodbURL = f"mongodb://localhost/{dbName}"
print(mongodbURL)
client = MongoClient(mongodbURL)
db = client.get_database()

mongodb://localhost/companies


### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [13]:
cursor = db.companies.find({'name':'Babelgum'}, {"name":1})
for e in cursor:
    print(e['name'])

Babelgum


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [26]:
cursor = db.companies.find({'number_of_employees':{'$gt':5000}}).sort([('number_of_employees', DESCENDING)]).limit(20)
for e in cursor:
    print(e['name'], e['number_of_employees'])

Siemens 405000
IBM 388000
Toyota 320000
PayPal 300000
Nippon Telegraph and Telephone Corporation 227000
Samsung Electronics 221726
Accenture 205000
Tata Consultancy Services 200300
Flextronics International 200000
Safeway 186000
Sony 180500
LG 177000
Ford 171000
Boeing 160000
Digital Equipment Corporation 140000
Nokia 125000
MItsubishi Electric 107000
MItsubishi Electric 107000
Comcast 100000
Bertelsmann 100000


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [30]:
#Limited to 10 for brevity
years = list(range(2000, 2006))
cursor = db.companies.find({'founded_year':{'$in':years}}, {'name': 1, 'founded_year': 1}).limit(10)
for e in cursor:
    print(e['name'], e['founded_year'])

Zoho 2005
Wetpaint 2005
Omnidrive 2005
Digg 2004
Gizmoz 2003
StumbleUpon 2002
Facebook 2004
Helio 2005
Plaxo 2002
Technorati 2002


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [41]:
cursor = db.companies.find({'$and':[{'ipo.valuation_amount':{'$gt': 100000000}},
                                    {'founded_year':{'$lt':2010}}]}, 
                           {'name':1,'ipo':1})
for i,e in enumerate(cursor):
    if i == 3: break
    print(e['name'], e['ipo'])

Facebook {'valuation_amount': 104000000000, 'valuation_currency_code': 'USD', 'pub_year': 2012, 'pub_month': 5, 'pub_day': 18, 'stock_symbol': 'NASDAQ:FB'}
Twitter {'valuation_amount': 18100000000, 'valuation_currency_code': 'USD', 'pub_year': 2013, 'pub_month': 11, 'pub_day': 7, 'stock_symbol': 'NYSE:TWTR'}
Yelp {'valuation_amount': 1300000000, 'valuation_currency_code': 'USD', 'pub_year': 2012, 'pub_month': 3, 'pub_day': 2, 'stock_symbol': 'NYSE:YELP'}


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [45]:
cursor = db.companies.find({'$and':[{'number_of_employees':{'$lt':1000}}, {'founded_year':{'$lt':2005}}]}).sort([('number_of_employees', DESCENDING)]).limit(10)

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['number_of_employees'], e['founded_year'])

Infinera Corporation 974 2000
NorthPoint Communications Group 948 1997
888 Holdings 931 1997
Forrester Research 903 1983
SonicWALL 900 1991


### 6. All the companies that don't include the `partners` field.

In [55]:
cursor = db.companies.find({'partners':{'$exists':'false'}})

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'])

AdventNet
Zoho
Wetpaint
Omnidrive
Postini


### 7. All the companies that have a null type of value on the `category_code` field.

In [58]:
cursor = db.companies.find({'category_code': None})

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'])

Collective
Snimmer
KoolIM
Level9 Media
VidKing


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [63]:
cursor = db.companies.find({'$and':[{'number_of_employees':{'$gt':100}},{'number_of_employees':{'$lt':1000}}]},
                          {'name':1, 'number_of_employees':1})

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['number_of_employees'])

AdventNet 600
AddThis 120
OpenX 305
LifeLock 644
Jajah 110


### 9. Order all the companies by their IPO price in a descending order.

In [71]:
#Asumo que el IPO price es el IPO valuation_amount
cursor = db.companies.find({'ipo.valuation_amount':{'$exists':'true'}}).sort([('ipo.valuation_amount', DESCENDING)])

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['ipo']['valuation_amount'])

GREE 108960000000
Facebook 104000000000
Amazon 100000000000
Twitter 18100000000
Groupon 12800000000


### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [72]:
cursor = db.companies.find().sort([('number_of_employees', DESCENDING)]).limit(10)

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['number_of_employees'])

Siemens 405000
IBM 388000
Toyota 320000
PayPal 300000
Nippon Telegraph and Telephone Corporation 227000


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [73]:
cursor = db.companies.find({'founded_month':{'$gt':5}}).limit(1000)

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['founded_month'])

Zoho 9
Wetpaint 10
Omnidrive 11
Postini 6
Geni 6


### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [76]:
cursor = db.companies.find({'$and':[{'acquisition.price_amount':{'$gt':10000000}},{'founded_year':{'$lt': 2000}}]})

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['acquisition']['price_amount'], e['founded_year'])

Postini 625000000 1999
SideStep 180000000 1999
Recipezaar 25000000 1999
PayPal 1500000000 1998
Snapfish 300000000 1999


### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [95]:
cursor = db.companies.find({'acquisition.acquired_year':{'$gt':2010}},
                          {'name':1, 'acquisition':1}).sort([('acquisition.price_amount', DESCENDING)])

for i,e in enumerate(cursor):
    if i == 2: break
    print(e['name'],e['acquisition'])

T-Mobile {'price_amount': 39000000000, 'price_currency_code': 'USD', 'term_code': None, 'source_url': 'http://techcrunch.com/2011/03/20/in-the-race-for-more-spectrum-att-is-acquiring-t-mobile-for-39-billion/', 'source_description': 'In The Race For More Spectrum, AT&T Is Acquiring T-Mobile For $39 Billion', 'acquired_year': 2011, 'acquired_month': 3, 'acquired_day': 20, 'acquiring_company': {'name': 'AT&T', 'permalink': 'at-t'}}
Goodrich Corporation {'price_amount': 18400000000, 'price_currency_code': 'USD', 'term_code': None, 'source_url': 'http://www.masshightech.com/stories/2011/09/19/daily37-UTC-shells-out-184-billion-for-Goodrich.html', 'source_description': 'UTC shells out $18.4 billion for Goodrich', 'acquired_year': 2011, 'acquired_month': 9, 'acquired_day': 22, 'acquiring_company': {'name': 'United Technologies', 'permalink': 'united-technologies'}}


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [100]:
#Tengo que limitar la busqueda porque no me da la RAM del ordenador :P
cursor = db.companies.find({},{'name':1,'founded_year':1}).sort([('founded_year', DESCENDING)]).limit(50)

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'],e['founded_year'])

Fixya 2013
Wamba 2013
Advaliant 2013
Fluc 2013
iBazar 2013


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [101]:
cursor = db.companies.find({'founded_day':{'$lte':7}}).sort([('acquisition.price_amount', DESCENDING)]).limit(10)

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['founded_day'])

Netscape 4
PayPal 1
Zappos 1
Alibaba 1
Postini 2


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [104]:
cursor = db.companies.find({'$and':[{'category_code':{'$eq':'web'}},{'number_of_employees':{'$gt':4000}}]}).sort([('number_of_employees', ASCENDING)])

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['category_code'], e['number_of_employees'])

Expedia web 4400
AOL web 8000
Webkinz web 8657
Rakuten web 10000
Los Angeles Times Media Group web 10000


### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [124]:
#Aquí lo intenté con $elemMatch, pero me devolvía un cursor vacío, no se por qué...
#cursor = db.companies.find({'acquisition':{'$elemMatch':{'price_amount':{'$gt':10000000}, 'price_currency_code':'EUR'}}})
cursor = db.companies.find({'$and':[{'acquisition.price_amount':{'$gt':10000000}}, {'acquisition.price_currency_code':{'$eq':'EUR'}}]})


for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['acquisition']['price_amount'], e['acquisition']['price_currency_code'])


ZYB 31500000 EUR
Apertio 140000000 EUR
Greenfield Online 40000000 EUR
Webedia 70000000 EUR
Wayfinder 24000000 EUR


### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [110]:
cursor = db.companies.find({'founded_month':{'$lt':4}}, {'name':1, 'acquisition':1})

for i,e in enumerate(cursor):
    if i == 2: break
    print(e['name'], e['acquisition'])

Gizmoz {'price_amount': None, 'price_currency_code': 'USD', 'term_code': None, 'source_url': 'http://www.gizmoz.com', 'source_description': 'TechCrunch', 'acquired_year': 2009, 'acquired_month': 12, 'acquired_day': 15, 'acquiring_company': {'name': 'Daz 3d', 'permalink': 'daz-3d'}}
StumbleUpon {'price_amount': 29000000, 'price_currency_code': 'USD', 'term_code': None, 'source_url': 'http://techcrunch.com/2009/04/13/ebay-unacquires-stumbleupon/', 'source_description': "StumbleUpon Beats Skype In Escaping EBay's Clutches", 'acquired_year': 2009, 'acquired_month': 4, 'acquired_day': None, 'acquiring_company': {'name': 'StumbleUpon', 'permalink': 'stumbleupon'}}


# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [112]:
cursor = db.companies.find({'$and':[{'$and':[{'founded_year':{'$gte':2000}}, {'founded_year':{'$lte':2010}}]}, 
                                            {'acquisition.acquired_year':{'$gt':2011}}]})

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['founded_year'], e['acquisition']['acquired_year'])

Wetpaint 2005 2013
Geni 2006 2012
Digg 2004 2012
blogTV 2006 2013
Revision3 2005 2012


### 20. All the companies that have been 'deadpooled' after the third year.

In [143]:
#cursor = db.companies.find({'$and':[{'deadpooled_year':{'$ne': None }}, 
#                                    {'deadpooled_year':{'$gt':'founded_year'}}]})
#cursor = db.companies.find({'deadpooled_year':{'$ne': None }})
cursor = db.companies.find({'deadpooled_year':{'$gt':'founded_year'}})
{"$where":"this.bounceCount > this.sentCount"}

for i,e in enumerate(cursor):
    if i == 5: break
    print(e['name'], e['deadpooled_year'])
#len(list(cursor))