# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient
client = MongoClient("mongodb://localhost/companies")
db = client.get_database()

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [37]:
query1 = {"name":"Babelgum"}
data1 = db["companies"].find(query1, {'name': 1, '_id':0})
list(data1)
#print(data1)

[{'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [40]:
query2 = {"number_of_employees":{"$gt":5000}}
data2 = db["companies"].find(query2).sort("number_of_employees",-1).limit(20)
n2 = list(data2)
#n2[0]

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [50]:
query3 = { "$and": [{"founded_year":{"$gte":2000}},{"founded_year":{"$lte":2005}}]}
data3 = db["companies"].find(query3, {'name': 1, "founded_year":1, '_id':0})
# print(len(list(data3)))
# 3734 empresas
# list(data3)

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [53]:
query4 = { "$and": [{"ipo.valuation_amount":{"$gt":100000000}},{"founded_year":{"$lt":2010}}]}
data4 = db["companies"].find(query4, {'name': 1, "ipo":1, '_id':0})

#list(data4)

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [55]:
query5 = { "$and": [{"number_of_employees":{"$lt":1000}},{"founded_year":{"$lt":2005}}]}
data5 = db["companies"].find(query5, {'name': 1, "number_of_employees":1, '_id':0}).sort("number_of_employees",-1).limit(10)

list(data5)

[{'name': 'Infinera Corporation', 'number_of_employees': 974},
 {'name': 'NorthPoint Communications Group', 'number_of_employees': 948},
 {'name': '888 Holdings', 'number_of_employees': 931},
 {'name': 'Forrester Research', 'number_of_employees': 903},
 {'name': 'SonicWALL', 'number_of_employees': 900},
 {'name': 'Webmetrics', 'number_of_employees': 900},
 {'name': 'Cornerstone OnDemand', 'number_of_employees': 881},
 {'name': 'Yelp', 'number_of_employees': 800},
 {'name': 'ZoomInfo', 'number_of_employees': 800},
 {'name': 'MySpace', 'number_of_employees': 800}]

### 6. All the companies that don't include the `partners` field.

In [67]:
# Aparentemente todos tienen 'partner', así que busco que exista el primer elemento dentro de 'partner'
query6 = { "partners.0": { "$exists": False }}
data6 = db["companies"].find(query6)#, {'name': 1, '_id':0})

# muestro solo 5
list(data6)[:5]

[{'_id': ObjectId('52cdef7c4bab8bd675297d8b'),
  'name': 'AdventNet',
  'permalink': 'abc3',
  'crunchbase_url': 'http://www.crunchbase.com/company/adventnet',
  'homepage_url': 'http://adventnet.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'manageengine',
  'category_code': 'enterprise',
  'number_of_employees': 600,
  'founded_year': 1996,
  'deadpooled_year': 2,
  'tag_list': '',
  'alias_list': 'Zoho ManageEngine ',
  'email_address': 'pr@adventnet.com',
  'phone_number': '925-924-9500',
  'description': 'Server Management Software',
  'created_at': datetime.datetime(2007, 5, 25, 19, 24, 22),
  'updated_at': 'Wed Oct 31 18:26:09 UTC 2012',
  'overview': '<p>AdventNet is now <a href="/company/zoho-manageengine" title="Zoho ManageEngine" rel="nofollow">Zoho ManageEngine</a>.</p>\n\n<p>Founded in 1996, AdventNet has served a diverse range of enterprise IT, networking and telecom customers.</p>\n\n<p>AdventNet supplies server and network management software.</p>

### 7. All the companies that have a null type of value on the `category_code` field.

In [70]:
query7 = { "category_code": { "$type": "null" }}
data7 = db["companies"].find(query7)#, {'name': 1, '_id':0})

# muestro solo 5
len(list(data7))
#2751 empresas

2751

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [72]:
query8 = { "$and": [{"number_of_employees":{"$gt":100}},{"number_of_employees":{"$lt":1000}}]}
data8 = db["companies"].find(query8, {'name': 1, "number_of_employees":1, '_id':0})

len(list(data8))
#753

753

### 9. Order all the companies by their IPO price in a descending order.

In [75]:
data9 = db["companies"].find().sort("ipo.valuation_amount",-1).limit(20)

list(data9)[:2]

[{'_id': ObjectId('52cdef7e4bab8bd67529a8b4'),
  'name': 'GREE',
  'permalink': 'gree',
  'crunchbase_url': 'http://www.crunchbase.com/company/gree',
  'homepage_url': 'http://www.gree-corp.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'gree_corp',
  'category_code': 'games_video',
  'number_of_employees': 700,
  'founded_year': 2004,
  'founded_month': 12,
  'founded_day': 7,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': 'mobile-web, japan, tokyo, social-network, mobile-social-network, mobile-games',
  'alias_list': None,
  'email_address': 'inquiry@gree-corp.com',
  'phone_number': '',
  'description': 'Internet media business,SNS,  free game',
  'created_at': 'Sat Dec 20 16:42:57 UTC 2008',
  'updated_at': 'Tue Jan 01 21:37:04 UTC 2013',
  'overview': '<p>GREE provides Japan&#8217;s leading mobile social network, and is at the forefront of mobile technology. GREE was ranked as Japan&#82

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [77]:
data10 = db["companies"].find().sort("number_of_employees",-1).limit(10)

list(data10)

[{'_id': ObjectId('52cdef7d4bab8bd67529941a'),
  'name': 'Siemens',
  'permalink': 'siemens',
  'crunchbase_url': 'http://www.crunchbase.com/company/siemens',
  'homepage_url': 'http://www.siemens.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': 'Siemens',
  'category_code': 'hardware',
  'number_of_employees': 405000,
  'founded_year': 1847,
  'founded_month': None,
  'founded_day': None,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': 'automation, building-technologies, drive-technology, energy',
  'alias_list': '',
  'email_address': 'contact@siemens.com',
  'phone_number': '49 89 636 34134',
  'description': 'Electronics and Electrical Engineering',
  'created_at': 'Thu Jul 31 09:29:43 UTC 2008',
  'updated_at': 'Thu Nov 28 20:32:55 UTC 2013',
  'overview': '<p>Siemens AG, an electronics and electrical engineering company, operates in the industry, energy, and healthcare sectors worldwide. 

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [82]:
query11 = { "founded_month":{"$gt":6}}
data11 = db["companies"].find(query11, {'name': 1, "founded_month":1, '_id':0}).limit(1000)

#list(data11)[0:5]
#list(data11)[50:80]
#list(data11)[750:800]

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [14]:
query12 = { "$and": [{"acquisitions.price_amount":{"$gt":10000000}},{"founded_year":{"$lt":2000}}]}
data12 = db["companies"].find(query12, {'name': 1, '_id':0})

len(list(data12))
#266

266

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [15]:
query13 = {"acquisition.acquired_year":{"$gt":2010}}
data13 = db["companies"].find(query13, {'name': 1, 'acquisition': 1, '_id':0})

#print(len(list(data13)))
#736
list(data13)[:5]

[{'name': 'Wetpaint',
  'acquisition': {'price_amount': 30000000,
   'price_currency_code': 'USD',
   'term_code': 'cash_and_stock',
   'source_url': 'http://allthingsd.com/20131216/viggle-tries-to-bulk-up-its-social-tv-business-by-buying-wetpaint/?mod=atdtweet',
   'source_description': ' Viggle Tries to Bulk Up Its Social TV Business by Buying Wetpaint',
   'acquired_year': 2013,
   'acquired_month': 12,
   'acquired_day': 16,
   'acquiring_company': {'name': 'Viggle', 'permalink': 'viggle'}}},
 {'name': 'Digg',
  'acquisition': {'price_amount': 500000,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2012/07/12/betaworks-acquires-digg/',
   'source_description': 'Betaworks Acquires Digg (TechCrunch)',
   'acquired_year': 2012,
   'acquired_month': 7,
   'acquired_day': 12,
   'acquiring_company': {'name': 'betaworks', 'permalink': 'betaworks'}}},
 {'name': 'Geni',
  'acquisition': {'price_amount': None,
   'price_currency_code': 'USD',
 

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [26]:
#Filtramos que el año no sea nulo y le ponemos maximo 100 para que no muera
data14 = db["companies"].find({"founded_year": {"$not":{"$eq": None}}},{'name': 1, 'founded_year':1, '_id':0}).sort("founded_year",1).limit(100)

#len(list(data14))
#266
list(data14)

[{'name': 'Alstrasoft', 'founded_year': 1800},
 {'name': 'SmallWorlds', 'founded_year': 1800},
 {'name': 'US Army', 'founded_year': 1800},
 {'name': 'DuPont', 'founded_year': 1802},
 {'name': 'McKesson', 'founded_year': 1833},
 {'name': 'Bachmann Industries', 'founded_year': 1833},
 {'name': 'Bertelsmann', 'founded_year': 1835},
 {'name': 'Accuity', 'founded_year': 1836},
 {'name': 'CENTRA', 'founded_year': 1839},
 {'name': 'WeGame', 'founded_year': 1840},
 {'name': 'VideoSurf', 'founded_year': 1840},
 {'name': 'VideoSurf', 'founded_year': 1840},
 {'name': 'The Economist Group', 'founded_year': 1843},
 {'name': 'Pearson', 'founded_year': 1844},
 {'name': 'Associated Press', 'founded_year': 1846},
 {'name': 'Beloit College', 'founded_year': 1846},
 {'name': 'Siemens', 'founded_year': 1847},
 {'name': 'Stiefel', 'founded_year': 1847},
 {'name': 'Pfizer', 'founded_year': 1848},
 {'name': 'Nykredit Realkredit', 'founded_year': 1851},
 {'name': 'Corning', 'founded_year': 1851},
 {'name': 'L

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [28]:
query15 = { "founded_day":{"$lte":7}}
data15 = db["companies"].find(query15, {'name': 1, "founded_day":1, "acquisition.price_amount":1, '_id':0}).sort("acquisition.price_amount",-1).limit(10)


list(data15)

[{'name': 'Netscape',
  'founded_day': 4,
  'acquisition': {'price_amount': 4200000000}},
 {'name': 'PayPal',
  'founded_day': 1,
  'acquisition': {'price_amount': 1500000000}},
 {'name': 'Zappos',
  'founded_day': 1,
  'acquisition': {'price_amount': 1200000000}},
 {'name': 'Alibaba',
  'founded_day': 1,
  'acquisition': {'price_amount': 1000000000}},
 {'name': 'Postini',
  'founded_day': 2,
  'acquisition': {'price_amount': 625000000}},
 {'name': 'Danger',
  'founded_day': 1,
  'acquisition': {'price_amount': 500000000}},
 {'name': 'Clearwell Systems',
  'founded_day': 6,
  'acquisition': {'price_amount': 410000000}},
 {'name': 'PrimeSense',
  'founded_day': 1,
  'acquisition': {'price_amount': 345000000}},
 {'name': 'Amobee',
  'founded_day': 1,
  'acquisition': {'price_amount': 321000000}},
 {'name': 'BlueLithium',
  'founded_day': 1,
  'acquisition': {'price_amount': 300000000}}]

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [33]:
query16 = { "$and": [{ "category_code":{"$eq":"web"}},{ "number_of_employees":{"$gt":4000 } }]}
data16 = db["companies"].find(query16, {'name': 1, '_id':0}).sort("number_of_employees",1)


list(data16)

[{'name': 'Expedia'},
 {'name': 'AOL'},
 {'name': 'Webkinz'},
 {'name': 'Rakuten'},
 {'name': 'Los Angeles Times Media Group'},
 {'name': 'Groupon'},
 {'name': 'Yahoo!'},
 {'name': 'eBay'},
 {'name': 'Experian'}]

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [36]:
query17 = { "$and": [{"acquisitions.price_amount":{"$gt":10000000}},{"acquisition.price_currency_code":{"$eq":"EUR"}}]}
data17 = db["companies"].find(query17, {'name': 1, '_id':0})

list(data17)

[{'name': 'Greenfield Online'}]

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [37]:
# acquired_month
query18 = {"acquisition.acquired_month":{"$lt":4}}
data18 = db["companies"].find(query18, {'name': 1, 'acquisition':1, '_id':0}).limit(10)

list(data18)

[{'name': 'Kyte',
  'acquisition': {'price_amount': None,
   'price_currency_code': 'USD',
   'term_code': None,
   'source_url': 'http://techcrunch.com/2011/01/31/exclusive-kit-digital-acquires-kickapps-kewego-and-kyte-for-77-2-million/',
   'source_description': 'KIT digital Acquires KickApps, Kewego AND Kyte For $77.2 Million',
   'acquired_year': 2011,
   'acquired_month': 1,
   'acquired_day': 31,
   'acquiring_company': {'name': 'KIT digital', 'permalink': 'kit-digital'}}},
 {'name': 'NetRatings',
  'acquisition': {'price_amount': 327000000,
   'price_currency_code': 'USD',
   'term_code': 'cash',
   'source_url': 'http://login.vnuemedia.com/hr/login/login_subscribe.jsp?id=0oqDem1gYIfIclz9i2%2Ffqj5NxCp2AC5DPbVnyT2da8GyV2mXjasabE128n69OrmcAh52%2FGE3pSG%2F%0AEKRYD9vh9EhrJrxukmUzh532fSMTZXL42gwPB80UWVtF1NwJ5UZSM%2BCkLU1mpYBoHFgiH%2Fi0f6Ax%0A9yMIVxt47t%2BHamhEQ0nkOEK24L',
   'source_description': 'Nielsen buys rest of NetRatings',
   'acquired_year': 2007,
   'acquired_month': 2,
   

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [41]:
# AÑO_F > 2000 AND AÑO_F < 2010 AND (AÑO_A > 2011 OR AÑO_A ES NULO)

query19 = { "$and": [{"founded_year":{"$gte":2000}},{"founded_year":{"$lte":2010}},{"$or": [{"acquisition.acquired_year":{"$gt":2011}},{"acquisition.acquired_year":{"$eq":"null"}}] }]}

data19 = db["companies"].find(query19, {'name': 1,"founded_year":1, "acquisition.acquired_year":1, '_id':0})

#len(list(data19))
#274
list(data19)[0:20]

[{'name': 'Wetpaint',
  'founded_year': 2005,
  'acquisition': {'acquired_year': 2013}},
 {'name': 'Digg',
  'founded_year': 2004,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'Geni',
  'founded_year': 2006,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'blogTV',
  'founded_year': 2006,
  'acquisition': {'acquired_year': 2013}},
 {'name': 'Revision3',
  'founded_year': 2005,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'iContact',
  'founded_year': 2003,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'Mashery',
  'founded_year': 2006,
  'acquisition': {'acquired_year': 2013}},
 {'name': 'KickApps',
  'founded_year': 2004,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'Dailymotion',
  'founded_year': 2005,
  'acquisition': {'acquired_year': 2013}},
 {'name': 'Netvibes',
  'founded_year': 2005,
  'acquisition': {'acquired_year': 2012}},
 {'name': 'adBrite',
  'founded_year': 2003,
  'acquisition': {'acquired_year': 2013}},
 {'name': 'Meebo',
  'founded

### 20. All the companies that have been 'deadpooled' after the third year.

In [55]:
# SELECT A+B AS C FROM MYTABLE WHERE C>10
# can be translated to a mongo query as
# db.MYTABLE.find({ "$expr": { "$gt": [ { "$add": [ "$A", "$B" ] },10] }})



query20 = {"$expr": { "$gt": [{ "$subtract": ["$deadpooled_year" , "$founded_year"]} ,3]}}

select20 = {'name': 1,"founded_year":1, "deadpooled_year":1, '_id':0}

data20 = db["companies"].find(query20, select20)

list(data20)[50:100]


[{'name': 'Zipidee', 'founded_year': 2007, 'deadpooled_year': 2011},
 {'name': 'Confabb', 'founded_year': 2006, 'deadpooled_year': 2013},
 {'name': 'YowTRIP', 'founded_year': 2007, 'deadpooled_year': 2011},
 {'name': 'Buzzwire', 'founded_year': 2007, 'deadpooled_year': 2013},
 {'name': 'Eurekster', 'founded_year': 2004, 'deadpooled_year': 2008},
 {'name': 'Songbird', 'founded_year': 2006, 'deadpooled_year': 2013},
 {'name': 'GridNetworks', 'founded_year': 2004, 'deadpooled_year': 2012},
 {'name': 'Upcoming', 'founded_year': 2003, 'deadpooled_year': 2013},
 {'name': 'Faves', 'founded_year': 2004, 'deadpooled_year': 2011},
 {'name': 'Excite@Home', 'founded_year': 1995, 'deadpooled_year': 2001},
 {'name': 'Minekey', 'founded_year': 2005, 'deadpooled_year': 2012},
 {'name': 'Apprema', 'founded_year': 2007, 'deadpooled_year': 2013},
 {'name': 'Skribit', 'founded_year': 2007, 'deadpooled_year': 2012},
 {'name': 'xkoto', 'founded_year': 2005, 'deadpooled_year': 2011},
 {'name': 'SeeToo', 'fou