# Advanced Querying Mongo

Importing libraries and setting up connection

In [185]:
from pymongo import MongoClient
import pymongo
client = MongoClient("mongodb://localhost/companies")
db = client.get_database()

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [167]:
query = {"name":'Babelgum'}
list(db["companies"].find(query,{'name':1,'_id':0}))

[{'name': 'Babelgum'}]

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [252]:
companieMore_employees = db.companies.find({"number_of_employees":{"$gt":5000}}).sort("number_of_employees",-1).limit(20)
list(companieMore_employees)[0]

{'_id': ObjectId('52cdef7d4bab8bd67529941a'),
 'name': 'Siemens',
 'permalink': 'siemens',
 'crunchbase_url': 'http://www.crunchbase.com/company/siemens',
 'homepage_url': 'http://www.siemens.com',
 'blog_url': '',
 'blog_feed_url': '',
 'twitter_username': 'Siemens',
 'category_code': 'hardware',
 'number_of_employees': 405000,
 'founded_year': 1847,
 'founded_month': None,
 'founded_day': None,
 'deadpooled_year': None,
 'deadpooled_month': None,
 'deadpooled_day': None,
 'deadpooled_url': None,
 'tag_list': 'automation, building-technologies, drive-technology, energy',
 'alias_list': '',
 'email_address': 'contact@siemens.com',
 'phone_number': '49 89 636 34134',
 'description': 'Electronics and Electrical Engineering',
 'created_at': 'Thu Jul 31 09:29:43 UTC 2008',
 'updated_at': 'Thu Nov 28 20:32:55 UTC 2013',
 'overview': '<p>Siemens AG, an electronics and electrical engineering company, operates in the industry, energy, and healthcare sectors worldwide. The company, formerly kno

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [169]:
companiesFounded = db.companies.find({'$and':[{"founded_year":{"$gte":2000}}, {"founded_year":{"$lte":2005}}]},{'name':1,'founded_year':1,'_id':0})
list(companiesFounded)[0]


{'name': 'Zoho', 'founded_year': 2005}

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [170]:
companiesValuation = db.companies.find({'$and':[{"ipo.valuation_amount":{"$gte":100000000}}, {"founded_year":{"$lt":2010}}]},{'name':1,'ipo':1,'_id':0})
list(companiesValuation)[0]

{'name': 'Twitter',
 'ipo': {'valuation_amount': 18100000000,
  'valuation_currency_code': 'USD',
  'pub_year': 2013,
  'pub_month': 11,
  'pub_day': 7,
  'stock_symbol': 'NYSE:TWTR'}}

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [171]:
companiesLess_employees = db.companies.find({'$and':[{"number_of_employees":{"$lt":1000}}, {"founded_year":{"$lt":2005}}]}).sort("number_of_employees").limit(10)
list(companiesLess_employees)[0]

{'_id': ObjectId('52cdef7c4bab8bd675297d93'),
 'name': 'Fox Interactive Media',
 'permalink': 'fox-interactive-media',
 'crunchbase_url': 'http://www.crunchbase.com/company/fox-interactive-media',
 'homepage_url': 'http://www.newscorp.com',
 'blog_url': '',
 'blog_feed_url': '',
 'twitter_username': 'twitterapi',
 'category_code': 'web',
 'number_of_employees': 0,
 'founded_year': 1979,
 'founded_month': 6,
 'founded_day': 1,
 'deadpooled_year': None,
 'deadpooled_month': None,
 'deadpooled_day': None,
 'deadpooled_url': '',
 'tag_list': '',
 'alias_list': None,
 'email_address': '',
 'phone_number': '',
 'description': '',
 'created_at': 'Thu May 31 21:46:57 UTC 2007',
 'updated_at': 'Mon Aug 19 17:13:27 UTC 2013',
 'overview': '<p>Fox Interactive Media (FIM) oversees <a href="http://www.crunchbase.com/company/newscorporation" title="News Corporation">News Corporation</a>&#8217;s Internet business operations.</p>',
 'image': {'available_sizes': [[[150, 71],
    'assets/images/resized/

### 6. All the companies that don't include the `partners` field.

In [172]:
withoutPartners = db.companies.find({'partners':{'$exists': False}})
list(withoutPartners)

[]

### 7. All the companies that have a null type of value on the `category_code` field.

In [173]:
companiesNull = db.companies.find({'category_code': None})
list(companiesNull)[0]

# companiesNull = db.companies.find({'category_code':{'$type': 10}})
# list(companiesNull)

# Existe alguna diferencia entre estas dos?

{'_id': ObjectId('52cdef7c4bab8bd6752980f6'),
 'name': 'Collective',
 'permalink': 'collective',
 'crunchbase_url': 'http://www.crunchbase.com/company/collective',
 'homepage_url': None,
 'blog_url': None,
 'blog_feed_url': None,
 'twitter_username': None,
 'category_code': None,
 'number_of_employees': None,
 'founded_year': None,
 'founded_month': None,
 'founded_day': None,
 'deadpooled_year': None,
 'deadpooled_month': None,
 'deadpooled_day': None,
 'deadpooled_url': None,
 'tag_list': None,
 'alias_list': None,
 'email_address': None,
 'phone_number': None,
 'description': None,
 'created_at': 'Thu Sep 26 13:15:02 UTC 2013',
 'updated_at': 'Thu Sep 26 13:15:02 UTC 2013',
 'overview': None,
 'image': None,
 'products': [],
 'relationships': [],
 'competitions': [],
 'providerships': [],
 'total_money_raised': '$0',
 'funding_rounds': [],
 'investments': [],
 'acquisition': None,
 'acquisitions': [],
 'offices': [],
 'milestones': [],
 'ipo': None,
 'video_embeds': [],
 'screenshot

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [240]:
comp_employees = db.companies.find({'$and':[{"number_of_employees":{"$gte":100}}, {"number_of_employees":{"$lt":1000}}]},{'name':1,'number_of_employees':1,'_id':0})
list(comp_employees)[0]

{'name': 'AdventNet', 'number_of_employees': 600}

### 9. Order all the companies by their IPO price in a descending order.

In [194]:
order_IpoPrice = db.companies.find().sort([("ipo.valuation_amount",-1)]).limit(2)
list(order_IpoPrice)[0]

{'_id': ObjectId('52cdef7e4bab8bd67529a8b4'),
 'name': 'GREE',
 'permalink': 'gree',
 'crunchbase_url': 'http://www.crunchbase.com/company/gree',
 'homepage_url': 'http://www.gree-corp.com',
 'blog_url': '',
 'blog_feed_url': '',
 'twitter_username': 'gree_corp',
 'category_code': 'games_video',
 'number_of_employees': 700,
 'founded_year': 2004,
 'founded_month': 12,
 'founded_day': 7,
 'deadpooled_year': None,
 'deadpooled_month': None,
 'deadpooled_day': None,
 'deadpooled_url': None,
 'tag_list': 'mobile-web, japan, tokyo, social-network, mobile-social-network, mobile-games',
 'alias_list': None,
 'email_address': 'inquiry@gree-corp.com',
 'phone_number': '',
 'description': 'Internet media business,SNS,  free game',
 'created_at': 'Sat Dec 20 16:42:57 UTC 2008',
 'updated_at': 'Tue Jan 01 21:37:04 UTC 2013',
 'overview': '<p>GREE provides Japan&#8217;s leading mobile social network, and is at the forefront of mobile technology. GREE was ranked as Japan&#8217;s fastest-growing tech

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [202]:
comMore_emp = db.companies.find().sort(([("number_of_employees",-1)])).limit(10)
list(comMore_emp)[0]

{'_id': ObjectId('52cdef7d4bab8bd67529941a'),
 'name': 'Siemens',
 'permalink': 'siemens',
 'crunchbase_url': 'http://www.crunchbase.com/company/siemens',
 'homepage_url': 'http://www.siemens.com',
 'blog_url': '',
 'blog_feed_url': '',
 'twitter_username': 'Siemens',
 'category_code': 'hardware',
 'number_of_employees': 405000,
 'founded_year': 1847,
 'founded_month': None,
 'founded_day': None,
 'deadpooled_year': None,
 'deadpooled_month': None,
 'deadpooled_day': None,
 'deadpooled_url': None,
 'tag_list': 'automation, building-technologies, drive-technology, energy',
 'alias_list': '',
 'email_address': 'contact@siemens.com',
 'phone_number': '49 89 636 34134',
 'description': 'Electronics and Electrical Engineering',
 'created_at': 'Thu Jul 31 09:29:43 UTC 2008',
 'updated_at': 'Thu Nov 28 20:32:55 UTC 2013',
 'overview': '<p>Siemens AG, an electronics and electrical engineering company, operates in the industry, energy, and healthcare sectors worldwide. The company, formerly kno

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [213]:
comFounded = db.companies.find({"founded_month":{'$gt':7}}).limit(1000)
list(comFounded)[0]

{'_id': ObjectId('52cdef7c4bab8bd675297d8c'),
 'name': 'Zoho',
 'permalink': 'abc4',
 'crunchbase_url': 'http://www.crunchbase.com/company/zoho',
 'homepage_url': 'http://zoho.com',
 'blog_url': 'http://blogs.zoho.com/',
 'blog_feed_url': 'http://blogs.zoho.com/feed',
 'twitter_username': 'zoho',
 'category_code': 'software',
 'number_of_employees': 1600,
 'founded_year': 2005,
 'founded_month': 9,
 'founded_day': 15,
 'deadpooled_year': 3,
 'tag_list': 'zoho, officesuite, spreadsheet, writer, projects, sheet, crm, show, creator, wiki, planner, suite, notebook, chat, meeting, mail',
 'alias_list': '',
 'email_address': 'info@zohocorp.com',
 'phone_number': '1-888-204-3539',
 'description': 'Online Business Apps Suite',
 'created_at': 'Fri May 25 19:30:28 UTC 2007',
 'updated_at': 'Wed Oct 30 00:07:05 UTC 2013',
 'overview': '<p>Zoho offers a suite of Business, Collaboration &amp; Productivity applications. Apps include CRM, Customer Support, Office Suite, Email Hosting, Project Managem

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [204]:
foundedAcq = db.companies.find({'$and':[{"founded_year":{"$lt":2000}}, {"acquisition.price_amount":{"$gt":10000}}]})
list(foundedAcq)[0]


{'_id': ObjectId('52cdef7c4bab8bd675297d90'),
 'name': 'Postini',
 'permalink': 'postini',
 'crunchbase_url': 'http://www.crunchbase.com/company/postini',
 'homepage_url': 'http://postini.com',
 'blog_url': '',
 'blog_feed_url': '',
 'twitter_username': None,
 'category_code': 'web',
 'number_of_employees': None,
 'founded_year': 1999,
 'founded_month': 6,
 'founded_day': 2,
 'deadpooled_year': None,
 'deadpooled_month': None,
 'deadpooled_day': None,
 'deadpooled_url': None,
 'tag_list': '',
 'alias_list': None,
 'email_address': '',
 'phone_number': '888.584.3150',
 'description': None,
 'created_at': 'Fri Jun 08 12:19:51 UTC 2007',
 'updated_at': 'Sat Aug 13 18:02:34 UTC 2011',
 'overview': '<p>Postini focuses on two main issues: security and compliance. Postini states that it handles more than 1 billion messages everyday and protects more than 35,000 businesses worldwide.</p>\n\n<p>Postini offers solutions that protect your company from malicious internet attacks. The Postini Commu

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [224]:
compAcq = db.companies.find({"acquisition.acquired_year":{"$gt":2010}},{'name':1,'acquisition':1,'_id':0}).sort("acquisition.price_amount")
list(compAcq)[0]

{'name': 'Geni',
 'acquisition': {'price_amount': None,
  'price_currency_code': 'USD',
  'term_code': None,
  'source_url': 'http://techcrunch.com/2012/11/28/all-in-the-family-myheritage-buys-former-yammer-stablemate-geni-com-raises-25m/',
  'source_description': 'MyHeritage acquires Geni and $25M to build family tree of the whole world',
  'acquired_year': 2012,
  'acquired_month': 11,
  'acquired_day': 28,
  'acquiring_company': {'name': 'MyHeritage', 'permalink': 'myheritage'}}}

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [241]:
foundedYear = db.companies.find({"founded_year":{"$exists":True}},{"name":1,"founded_year":1,"_id":0}).sort("founded_year").limit(2)
list(foundedYear)

[{'name': 'Flektor', 'founded_year': None},
 {'name': 'Lala', 'founded_year': None}]

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [257]:
firstFound = db.companies.find({"founded_day":{"$lte":7}}).sort("acquisition.price_amount",-1).limit(10)
list(firstFound)

[{'_id': ObjectId('52cdef7d4bab8bd6752989a1'),
  'name': 'Netscape',
  'permalink': 'netscape',
  'crunchbase_url': 'http://www.crunchbase.com/company/netscape',
  'homepage_url': 'http://netscape.aol.com',
  'blog_url': '',
  'blog_feed_url': '',
  'twitter_username': '',
  'category_code': 'software',
  'number_of_employees': None,
  'founded_year': 1994,
  'founded_month': 4,
  'founded_day': 4,
  'deadpooled_year': None,
  'deadpooled_month': None,
  'deadpooled_day': None,
  'deadpooled_url': None,
  'tag_list': None,
  'alias_list': '',
  'email_address': '',
  'phone_number': '',
  'description': '',
  'created_at': 'Tue May 06 00:27:28 UTC 2008',
  'updated_at': 'Thu Nov 14 00:57:06 UTC 2013',
  'overview': '<p>Netscape Communications Corporation offers development, marketing, sale, and support of enterprise software solutions. Its products include e-commerce infrastructure and e-commerce applications targeted primarily at corporate intranets and extranets, and Internet. The co

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [260]:
catWeb = db.companies.find({'$and':[{"category_code":{"$eq":'web'}}, {"number_of_employees":{"$gt":4000}}]}).sort("number_of_employees",1)
list(catWeb)[0]

{'_id': ObjectId('52cdef7c4bab8bd67529822a'),
 'name': 'Expedia',
 'permalink': 'expedia',
 'crunchbase_url': 'http://www.crunchbase.com/company/expedia',
 'homepage_url': 'http://www.expedia.com',
 'blog_url': '',
 'blog_feed_url': '',
 'twitter_username': 'Expedia',
 'category_code': 'web',
 'number_of_employees': 4400,
 'founded_year': 1996,
 'founded_month': None,
 'founded_day': None,
 'deadpooled_year': None,
 'deadpooled_month': None,
 'deadpooled_day': None,
 'deadpooled_url': None,
 'tag_list': 'travel, flights, hotels, tickets, airline, vacation, travel-comparison, travel-search',
 'alias_list': '',
 'email_address': '',
 'phone_number': '1-800-EXPEDIA',
 'description': '',
 'created_at': 'Sun Feb 10 03:29:12 UTC 2008',
 'updated_at': 'Sun Dec 08 07:03:33 UTC 2013',
 'overview': '<p>Expedia.com is an Internet-based travel agency and a part of Expedia, Inc.. It books airline tickets, hotel reservations, car rentals, cruises, vacation packages, and various attractions and servi

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [261]:
currEur = db.companies.find({'$and':[{"acquisition.price_amount":{"$gte":10000}}, {"acquisition.price_currency_code":{"$gt":'EUR'}}]})
list(currEur)[0]

{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
 'name': 'Wetpaint',
 'permalink': 'abc2',
 'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint',
 'homepage_url': 'http://wetpaint-inc.com',
 'blog_url': 'http://digitalquarters.net/',
 'blog_feed_url': 'http://digitalquarters.net/feed/',
 'twitter_username': 'BachelrWetpaint',
 'category_code': 'web',
 'number_of_employees': 47,
 'founded_year': 2005,
 'founded_month': 10,
 'founded_day': 17,
 'deadpooled_year': 1,
 'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system',
 'alias_list': '',
 'email_address': 'info@wetpaint.com',
 'phone_number': '206.859.6300',
 'description': 'Technology Platform Company',
 'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27),
 'updated_at': 'Sun Dec 08 07:15:44 UTC 2013',
 'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for di

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [267]:
frstTri = db.companies.find({"acquisition.acquired_month":{"$lte":3}},{'name':1,'acquisition':1,'_id':0}).limit(10)
list(frstTri)[0]

{'name': 'Kyte',
 'acquisition': {'price_amount': None,
  'price_currency_code': 'USD',
  'term_code': None,
  'source_url': 'http://techcrunch.com/2011/01/31/exclusive-kit-digital-acquires-kickapps-kewego-and-kyte-for-77-2-million/',
  'source_description': 'KIT digital Acquires KickApps, Kewego AND Kyte For $77.2 Million',
  'acquired_year': 2011,
  'acquired_month': 1,
  'acquired_day': 31,
  'acquiring_company': {'name': 'KIT digital', 'permalink': 'kit-digital'}}}

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

### 20. All the companies that have been 'deadpooled' after the third year.

In [None]:
# Your Code