# Advanced Querying Mongo

Importing libraries and setting up connection

In [19]:
import pandas as pd
from pymongo import MongoClient
client = MongoClient("localhost:27017")
client.list_database_names()
db = client.get_database("ironhack")
db.list_collection_names()

['countries_small', 'companies', 'books', 'restaurants']

In [17]:
collection = db.get_collection("companies")

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [74]:
ex1 = collection.find({"name": "Babelgum"}, {"name": 1, "_id": 0})
df = pd.DataFrame(ex1)
df

Unnamed: 0,name
0,Babelgum


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [73]:
ex2 = collection.find({"number_of_employees": {"$gt": 5000}}).limit(20).sort("number_of_employees", -1)
df2 = pd.DataFrame(ex2)
df2.sample(5)

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,investments,acquisition,acquisitions,offices,milestones,ipo,video_embeds,screenshots,external_links,partners
13,52cdef7d4bab8bd675298b28,Boeing,boeing,http://www.crunchbase.com/company/boeing,http://www.boeing.com,,,boeingairplanes,other,160000,...,[],,"[{'price_amount': None, 'price_currency_code':...","[{'description': 'Corporate Headquarters', 'ad...","[{'id': 44355, 'description': ' Boeing To Rep...",,[],"[{'available_sizes': [[[150, 94], 'assets/imag...","[{'external_url': 'http://jobs-boeing.com/', '...",[]
15,52cdef7c4bab8bd675297fc2,Nokia,nokia,http://www.crunchbase.com/company/nokia,http://nokia.com,,,nokia,mobile,125000,...,"[{'funding_round': {'round_code': 'b', 'source...",,"[{'price_amount': 96800000, 'price_currency_co...","[{'description': 'headquarter', 'address1': 'K...","[{'id': 9161, 'description': 'Nokia, AT&T Putt...","{'valuation_amount': None, 'valuation_currency...","[{'embed_code': '<embed src=""http://blip.tv/pl...","[{'available_sizes': [[[150, 131], 'assets/ima...",[{'external_url': 'http://en.wikipedia.org/wik...,[]
1,52cdef7c4bab8bd67529856a,IBM,ibm,http://www.crunchbase.com/company/ibm,http://www.ibm.com,,,IBM,software,388000,...,"[{'funding_round': {'round_code': 'c', 'source...",,"[{'price_amount': None, 'price_currency_code':...","[{'description': 'Corporate Headquarters', 'ad...","[{'id': 10471, 'description': 'IBM Completes A...","{'valuation_amount': None, 'valuation_currency...","[{'embed_code': '<embed src=""http://blip.tv/pl...","[{'available_sizes': [[[150, 93], 'assets/imag...","[{'external_url': 'http://mashpedia.com/IBM', ...",[]
9,52cdef7d4bab8bd675299156,Safeway,safeway,http://www.crunchbase.com/company/safeway,http://www.safeway.com,,,,other,186000,...,[],,[],"[{'description': 'HQ', 'address1': '5918 Stone...",[],,[],"[{'available_sizes': [[[150, 123], 'assets/ima...",[],[]
12,52cdef7d4bab8bd675299d31,Ford,ford,http://www.crunchbase.com/company/ford,http://www.ford.com,,,FordService,automotive,171000,...,[],,"[{'price_amount': 9000000, 'price_currency_cod...","[{'description': '', 'address1': 'One American...",[],"{'valuation_amount': None, 'valuation_currency...",[],[],[],[]


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [72]:
ex3 = collection.find(
    {"$and": [
        {"founded_year": {"$gt": 1999}},
        {"founded_year": {"$lt": 2006}}
    ]},
    {"name":1, "founded_year":1})
df3 = pd.DataFrame(ex3)
df3.sample(5)

Unnamed: 0,_id,name,founded_year
784,52cdef7d4bab8bd6752989b5,YourTechOnline,2000
611,52cdef7c4bab8bd675298650,Wikimedia Foundation,2003
3321,52cdef7f4bab8bd67529bdf6,BiddingForGood,2003
622,52cdef7c4bab8bd675298684,Skycore,2003
1133,52cdef7d4bab8bd67529908e,Linspire,2001


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [101]:
ex4 = collection.find(
    {"$and": [
        {"ipo.valuation_amount": {"$gt": 100000000}},
        {"founded_year": {"$lt": 2010}}
    ]},
    {"name":1, "ipo":1, "_id": 0})
df4 = pd.DataFrame(ex4)
df4.sample(5)

Unnamed: 0,name,ipo
7,Nielsen,"{'valuation_amount': 1600000000, 'valuation_cu..."
31,QlikTech,"{'valuation_amount': 1000000000, 'valuation_cu..."
14,TripAdvisor,"{'valuation_amount': 3273770000, 'valuation_cu..."
17,QuinStreet,"{'valuation_amount': 140000000, 'valuation_cur..."
23,Marketo,"{'valuation_amount': 465000000, 'valuation_cur..."


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [56]:
ex5 = collection.find(
    {"$and": [
        {"number_of_employees": {"$lt": 1000}},
        {"founded_year": {"$lt": 2005}}
    ]}
    ).limit(10).sort("number_of_employees", 1)
df5 = pd.DataFrame(ex5)
df5

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,investments,acquisition,acquisitions,offices,milestones,ipo,video_embeds,screenshots,external_links,partners
0,52cdef7c4bab8bd675297d93,Fox Interactive Media,fox-interactive-media,http://www.crunchbase.com/company/fox-interact...,http://www.newscorp.com,,,twitterapi,web,0,...,"[{'funding_round': {'round_code': 'b', 'source...",,"[{'price_amount': 20000000, 'price_currency_co...","[{'description': '', 'address1': '407 N Maple ...",[],,"[{'embed_code': '<embed src=""http://blip.tv/pl...","[{'available_sizes': [[[150, 94], 'assets/imag...",[],[]
1,52cdef7c4bab8bd675297e12,Ticketmaster,ticketmaster,http://www.crunchbase.com/company/ticketmaster,http://www.ticketmaster.com,,,Ticketmaster,web,0,...,"[{'funding_round': {'round_code': 'b', 'source...","{'price_amount': None, 'price_currency_code': ...","[{'price_amount': 265000000, 'price_currency_c...","[{'description': 'World Headquarters', 'addres...",[],,[],"[{'available_sizes': [[[102, 150], 'assets/ima...",[],[]
2,52cdef7c4bab8bd67529807b,Eurekster,eurekster,http://www.crunchbase.com/company/eurekster,http://www.eurekster.com,,,,web,0,...,[],,[],"[{'description': None, 'address1': '', 'addres...",[],,"[{'embed_code': '<div style=""width:320px;""><di...",[],[],[]
3,52cdef7c4bab8bd675297eba,MindTouch,mindtouch,http://www.crunchbase.com/company/mindtouch,http://www.mindtouch.com,http://www.mindtouch.com/blog/,http://feeds.feedburner.com/Mindtouch,MindTouch,enterprise,0,...,[],,[],"[{'description': '', 'address1': '401 West A S...",[],,"[{'embed_code': '<object width=""450"" height=""2...","[{'available_sizes': [[[150, 121], 'assets/ima...",[],[]
4,52cdef7c4bab8bd675297dbc,Skype,skype,http://www.crunchbase.com/company/skype,http://www.skype.com,http://blogs.skype.com,http://feeds.feedburner.com/shareskypeen,skype,software,0,...,"[{'funding_round': {'round_code': 'a', 'source...","{'price_amount': 2600000000, 'price_currency_c...","[{'price_amount': 150000000, 'price_currency_c...","[{'description': 'Global HQ', 'address1': '23-...","[{'id': 552, 'description': 'Skype released pu...",,"[{'embed_code': '<iframe src=""http://blip.tv/p...","[{'available_sizes': [[[150, 130], 'assets/ima...",[{'external_url': 'http://www.ewdn.com/2011/06...,[]
5,52cdef7c4bab8bd6752981bb,Monster,monster,http://www.crunchbase.com/company/monster,http://monster.com,http://monsterworking.com/?HPS=9_4C6MonsterBlog,,MonsterWW,search,0,...,[],,"[{'price_amount': 93700000, 'price_currency_co...","[{'description': 'HQ', 'address1': '622 Third ...","[{'id': 8469, 'description': '$225 million acq...",,[],"[{'available_sizes': [[[150, 124], 'assets/ima...",[{'external_url': 'http://ceoworld.biz/2013/09...,[]
6,52cdef7c4bab8bd675298196,EditGrid,editgrid,http://www.crunchbase.com/company/editgrid,http://www.editgrid.com,http://blog.editgrid.com,http://feeds.feedburner.com/EditGridBlog,EditGrid,public_relations,0,...,[],,[],"[{'description': None, 'address1': '', 'addres...","[{'id': 9141, 'description': 'Was acquired (Un...",,"[{'embed_code': '<object width=""425"" height=""3...",[],[],[]
7,52cdef7c4bab8bd675298063,Simpy,simpy,http://www.crunchbase.com/company/simpy,http://www.simpy.com,http://blog.simpy.com/,http://blog.simpy.com/blojsom/blog/?flavor=rss2,,web,0,...,[],,[],"[{'description': '', 'address1': '', 'address2...",[],,[],[],[{'external_url': 'http://www.hiking-unlimited...,[]
8,52cdef7c4bab8bd6752980af,Compete,compete,http://www.crunchbase.com/company/compete,http://www.compete.com/us,http://blog.compete.com/,http://feeds.feedburner.com/compete/RMiU,compete,consulting,0,...,[],"{'price_amount': 150000000, 'price_currency_co...",[],"[{'description': 'Compete', 'address1': '501 B...","[{'id': 25223, 'description': 'Da questo nuovo...",,"[{'embed_code': '<iframe src=""http://player.vi...",[],[],[]
9,52cdef7c4bab8bd675297e2e,stylediary,stylediary,http://www.crunchbase.com/company/stylediary,http://www.stylediary.net,http://www.industrygirlblog.com,http://www.industrygirlblog.com/?feed=rss2,,web,0,...,[],"{'price_amount': None, 'price_currency_code': ...",[],"[{'description': None, 'address1': '', 'addres...",[],,[],"[{'available_sizes': [[[150, 94], 'assets/imag...",[],[]


### 6. All the companies that don't include the `partners` field.

In [78]:
ex6 = collection.find({"parters": {"$exists": False}})
df6 = pd.DataFrame(ex6)
df6.sample(5)

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,offices,milestones,video_embeds,screenshots,external_links,partners,deadpooled_month,deadpooled_day,deadpooled_url,ipo
948,52cdef7c4bab8bd67529813e,Xindesk,xindesk,http://www.crunchbase.com/company/xindesk,http://www.xindesk.com,http://cloudodev.blogspot.com,http://cloudodev.blogspot.com/feeds/posts/defa...,,web,,...,"[{'description': None, 'address1': 'Majorsgata...",[],[],"[{'available_sizes': [[[150, 93], 'assets/imag...",[],[],,,,
10488,52cdef7e4bab8bd67529a682,Royal Bank of Scotland,royal-bank-of-scotland,http://www.crunchbase.com/company/royal-bank-o...,,,,,,,...,[],[],[],[],[],[],,,,
15577,52cdef7e4bab8bd67529ba63,Interface Medien,interface-medien,http://www.crunchbase.com/company/interface-me...,http://www.interface-medien.de,,,InterfaceMedien,ecommerce,25.0,...,[],[],"[{'embed_code': '<iframe width=""560"" height=""3...",[],[],[],,,,
18775,52cdef7f4bab8bd67529c6e1,Purfresh,purfresh,http://www.crunchbase.com/company/purfresh,http://www.purfresh.com,http://info.purfresh.com/PurTalk,,Purfresh_Inc,cleantech,,...,"[{'description': 'HQ', 'address1': '1350 Willo...",[],[],"[{'available_sizes': [[[150, 91], 'assets/imag...",[],[],,,,
1723,52cdef7c4bab8bd675298445,crowdSPRING,crowdspring,http://www.crunchbase.com/company/crowdspring,http://www.crowdspring.com,http://blog.crowdspring.com/,http://blog.crowdspring.com/feed/,crowdSPRING,ecommerce,14.0,...,"[{'description': None, 'address1': '1200 W. La...",[],[],[],[],[],,,,


### 7. All the companies that have a null type of value on the `category_code` field.

In [71]:
ex7 = collection.find({"category_code": null})
df7 = pd.DataFrame(ex7)
df7
#NO ME SALE

NameError: name 'null' is not defined

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [85]:
ex8 = collection.find(
    {"$and": [
        {"number_of_employees": {"$gt": 100}},
        {"number_of_employees": {"$lt": 1000}}
    ]},
    {"name":1, "number_of_employees":1, "_id":0})
df8 = pd.DataFrame(ex8)
df8.sample(5)

Unnamed: 0,name,number_of_employees
361,LivingLink,800
456,BlueKai,142
517,National Association of Professional Women,200
264,Avature,120
613,NaturalMotion,150


### 9. Order all the companies by their IPO price in a descending order.

In [106]:
ex9 = collection.find({"ipo.valuation_amount": {"$exists": True}}, {"name":1, "ipo":1, "_id":0}).sort("ipo", -1)
df9 = pd.DataFrame(ex9)
df9.head(5)

Unnamed: 0,name,ipo
0,GREE,"{'valuation_amount': 108960000000, 'valuation_..."
1,Facebook,"{'valuation_amount': 104000000000, 'valuation_..."
2,Amazon,"{'valuation_amount': 100000000000, 'valuation_..."
3,Twitter,"{'valuation_amount': 18100000000, 'valuation_c..."
4,Groupon,"{'valuation_amount': 12800000000, 'valuation_c..."


### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [109]:
ex10 = collection.find().sort("number_of_employees", -1)
df10 = pd.DataFrame(ex10)
df10.head(5)

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,investments,acquisition,acquisitions,offices,milestones,ipo,video_embeds,screenshots,external_links,partners
0,52cdef7d4bab8bd67529941a,Siemens,siemens,http://www.crunchbase.com/company/siemens,http://www.siemens.com,,,Siemens,hardware,405000.0,...,[],,"[{'price_amount': 418000000, 'price_currency_c...","[{'description': '', 'address1': 'Wittelsbache...","[{'id': 35852, 'description': 'Siemens Receive...","{'valuation_amount': None, 'valuation_currency...","[{'embed_code': '<iframe width=""420"" height=""3...","[{'available_sizes': [[[150, 92], 'assets/imag...",[],[]
1,52cdef7c4bab8bd67529856a,IBM,ibm,http://www.crunchbase.com/company/ibm,http://www.ibm.com,,,IBM,software,388000.0,...,"[{'funding_round': {'round_code': 'c', 'source...",,"[{'price_amount': None, 'price_currency_code':...","[{'description': 'Corporate Headquarters', 'ad...","[{'id': 10471, 'description': 'IBM Completes A...","{'valuation_amount': None, 'valuation_currency...","[{'embed_code': '<embed src=""http://blip.tv/pl...","[{'available_sizes': [[[150, 93], 'assets/imag...","[{'external_url': 'http://mashpedia.com/IBM', ...",[]
2,52cdef7d4bab8bd675299d33,Toyota,toyota,http://www.crunchbase.com/company/toyota,http://www.toyota-global.com,,,Toyota,enterprise,320000.0,...,"[{'funding_round': {'round_code': 'a', 'source...",,[],"[{'description': 'HQ', 'address1': 'Toyota Mot...",[],,"[{'embed_code': '<iframe width=""430"" height=""3...","[{'available_sizes': [[[150, 84], 'assets/imag...",[],[]
3,52cdef7c4bab8bd675297e89,PayPal,paypal,http://www.crunchbase.com/company/paypal,http://www.paypal.com,,,paypal,finance,300000.0,...,"[{'funding_round': {'round_code': 'c', 'source...","{'price_amount': 1500000000, 'price_currency_c...","[{'price_amount': None, 'price_currency_code':...","[{'description': '', 'address1': '2145 E Hamil...","[{'id': 11916, 'description': 'Reserve Bank Of...",,[],"[{'available_sizes': [[[150, 120], 'assets/ima...",[{'external_url': 'http://www.sociableblog.com...,[]
4,52cdef7e4bab8bd67529b0fe,Nippon Telegraph and Telephone Corporation,nippon-telegraph-and-telephone-corporation,http://www.crunchbase.com/company/nippon-teleg...,http://www.ntt.co.jp/index_e.html,,,,,227000.0,...,"[{'funding_round': {'round_code': 'e', 'source...",,[],"[{'description': 'NTT', 'address1': '3-1, Otem...",[],,[],"[{'available_sizes': [[[150, 42], 'assets/imag...",[],[]


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [110]:
ex11 = collection.find({"founded_month": {"$gt": 5}}).limit(1000)
df11 = pd.DataFrame(ex11)
df11.sample(5)

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,offices,milestones,video_embeds,screenshots,external_links,partners,deadpooled_month,deadpooled_day,deadpooled_url,ipo
786,52cdef7d4bab8bd6752988e5,YouBeats,youbeats,http://www.crunchbase.com/company/youbeats,http://www.youbeats.com,,,,games_video,3.0,...,[],[],[],"[{'available_sizes': [[[150, 96], 'assets/imag...",[],[],,,,
550,52cdef7c4bab8bd6752984ea,Panizon,panizon,http://www.crunchbase.com/company/panizon,http://www.panizon.com,http://www.panizon.com/blog,http://www.panizon.com/blog/feed/,,advertising,4.0,...,"[{'description': None, 'address1': '', 'addres...",[],[],"[{'available_sizes': [[[150, 94], 'assets/imag...",[],[],,,,
484,52cdef7c4bab8bd6752983bd,ooVoo,oovoo,http://www.crunchbase.com/company/oovoo,http://www.oovoo.com,http://oovoo.tumblr.com,,ooVoo,web,95.0,...,"[{'description': 'Headquarters', 'address1': '...","[{'id': 23407, 'description': 'Robert Jackman ...",[],"[{'available_sizes': [[[150, 129], 'assets/ima...",[{'external_url': 'http://www.forbes.com/sites...,[],,,,
775,52cdef7d4bab8bd6752988c0,Aardvark,aardvark,http://www.crunchbase.com/company/aardvark,http://vark.com,http://blog.vark.com/,http://blog.vark.com/?feed=rss2,vark,web,30.0,...,"[{'description': None, 'address1': '', 'addres...",[],"[{'embed_code': '<embed src=""http://blip.tv/pl...",[],[],[],9.0,,http://googleblog.blogspot.com/2011/09/fall-sp...,
854,52cdef7d4bab8bd675298a40,Picitup,picitup,http://www.crunchbase.com/company/picitup,http://www.picitup.com,,,picitupcorp,search,6.0,...,"[{'description': None, 'address1': '', 'addres...",[],[],"[{'available_sizes': [[[150, 94], 'assets/imag...",[],[],,,,


### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [120]:
ex12 = collection.find(
    {"$and": [
        {"founded_year": {"$lt": 2001}},
        {"acquisition.price_amount": {"$gt": 10000}}
    ]})
df12 = pd.DataFrame(ex12)
df12.sample(5)

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,investments,acquisition,acquisitions,offices,milestones,ipo,video_embeds,screenshots,external_links,partners
244,52cdef7f4bab8bd67529c555,Veeco Instruments,veeco-instruments,http://www.crunchbase.com/company/veeco-instru...,http://www.veeco.com,,,,hardware,,...,[],"{'price_amount': 30000000, 'price_currency_cod...","[{'price_amount': 1700000, 'price_currency_cod...","[{'description': '', 'address1': 'Terminal Dri...","[{'id': 10589, 'description': 'Veeco Completes...",,[],[],[],[]
269,52cdef7f4bab8bd67529c6fa,EnteGreat,entegreat,http://www.crunchbase.com/company/entegreat,http://www.entegreat.com,,,,enterprise,,...,[],"{'price_amount': 5120000, 'price_currency_code...",[],"[{'description': '', 'address1': '1900 Interna...",[],,[],[],[],[]
94,52cdef7d4bab8bd675299320,Foundry Networks,foundry-networks,http://www.crunchbase.com/company/foundry-netw...,http://www.foundrynet.com,,,,software,,...,[],"{'price_amount': 3000000000, 'price_currency_c...",[],"[{'description': 'Corporate Headquarters', 'ad...",[],,[],[],[],[]
186,52cdef7e4bab8bd67529b0b6,Applimation,applimation,http://www.crunchbase.com/company/applimation,http://www.applimation.com,,,,enterprise,,...,[],"{'price_amount': 40000000, 'price_currency_cod...","[{'price_amount': None, 'price_currency_code':...","[{'description': '', 'address1': '525 West Mon...",[],,[],[],[],[]
11,52cdef7c4bab8bd675297f29,Lastminute,lastminute,http://www.crunchbase.com/company/lastminute,http://lastminute.com,,,,web,,...,[],"{'price_amount': 1008000000, 'price_currency_c...",[],"[{'description': 'Corporate Headquarters', 'ad...",[],,[],"[{'available_sizes': [[[150, 94], 'assets/imag...",[],[]


### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [126]:
ex13 = collection.find({"acquisition.acquired_year": {"$gt": 2010}}, {"name":1, "acquisition":1}).sort("acquisition.price_amount", -1)
df13 = pd.DataFrame(ex13)
df13.head(5)

Unnamed: 0,_id,name,acquisition
0,52cdef7c4bab8bd675298876,T-Mobile,"{'price_amount': 39000000000, 'price_currency_..."
1,52cdef7f4bab8bd67529c228,Goodrich Corporation,"{'price_amount': 18400000000, 'price_currency_..."
2,52cdef7d4bab8bd675298b89,LSI,"{'price_amount': 6600000000, 'price_currency_c..."
3,52cdef7e4bab8bd67529a2b5,National Semiconductor,"{'price_amount': 6500000000, 'price_currency_c..."
4,52cdef7d4bab8bd675298935,Ariba,"{'price_amount': 4300000000, 'price_currency_c..."


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [132]:
ex14 = collection.find({}, {"name":1, "founded_year":1}).sort("founded_year", -1)
df14 = pd.DataFrame(ex14)
df14.head(5)

Unnamed: 0,_id,name,founded_year
0,52cdef7c4bab8bd675297fec,Fixya,2013.0
1,52cdef7c4bab8bd67529801f,Wamba,2013.0
2,52cdef7c4bab8bd6752982d4,Advaliant,2013.0
3,52cdef7c4bab8bd67529830a,Fluc,2013.0
4,52cdef7d4bab8bd675298ea7,iBazar,2013.0


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [134]:
ex15 = collection.find({"founded_month": {"$lt": 8}}).sort("acquisition.price_amount", -1).limit(10)
df15 = pd.DataFrame(ex15)
df15

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,investments,acquisition,acquisitions,offices,milestones,ipo,video_embeds,screenshots,external_links,partners
0,52cdef7c4bab8bd675297ee9,Sun Microsystems,sun-microsystems,http://www.crunchbase.com/company/sun-microsys...,http://www.sun.com,,,sunmicrosystems,enterprise,33350.0,...,"[{'funding_round': {'round_code': 'c', 'source...","{'price_amount': 7400000000, 'price_currency_c...","[{'price_amount': 1000000000, 'price_currency_...","[{'description': None, 'address1': '4150 Netwo...","[{'id': 196, 'description': 'NBC selects Sun t...",,"[{'embed_code': '<embed src=""http://blip.tv/pl...","[{'available_sizes': [[[150, 92], 'assets/imag...",[{'external_url': 'http://mashpedia.com/Sun_Mi...,[]
1,52cdef7d4bab8bd675298934,Siebel Systems,siebel,http://www.crunchbase.com/company/siebel,http://www.siebel.com,,,,software,,...,[],"{'price_amount': 5850000000, 'price_currency_c...",[],"[{'description': '', 'address1': '500 Oracle P...",[],,[],"[{'available_sizes': [[[150, 94], 'assets/imag...",[],[]
2,52cdef7d4bab8bd6752989a1,Netscape,netscape,http://www.crunchbase.com/company/netscape,http://netscape.aol.com,,,,software,,...,[],"{'price_amount': 4200000000, 'price_currency_c...",[],"[{'description': '', 'address1': '466 Ellis St...",[],,[],[],[],[]
3,52cdef7c4bab8bd6752981d6,SuccessFactors,successfactors,http://www.crunchbase.com/company/successfactors,http://www.successfactors.com,http://www.successfactors.com/blogs/business-e...,http://feeds.feedburner.com/successfactors,successfactors,enterprise,1200.0,...,[],"{'price_amount': 3400000000, 'price_currency_c...","[{'price_amount': 50000000, 'price_currency_co...","[{'description': None, 'address1': '1500 Fashi...","[{'id': 7046, 'description': 'SuccessFactors t...","{'valuation_amount': None, 'valuation_currency...","[{'embed_code': '<object width=""520"" height=""3...",[],[],[]
4,52cdef7c4bab8bd675297dbd,YouTube,youtube,http://www.crunchbase.com/company/youtube,http://www.youtube.com,http://www.youtube.com/blog,http://youtube-global.blogspot.com/feeds/posts...,YouTube,games_video,0.0,...,[],"{'price_amount': 1650000000, 'price_currency_c...","[{'price_amount': None, 'price_currency_code':...","[{'description': 'Corporate Headquarters', 'ad...","[{'id': 989, 'description': 'YouTube Hits 100 ...",,"[{'embed_code': '<iframe width=""430"" height=""2...","[{'available_sizes': [[[150, 125], 'assets/ima...",[],[]
5,52cdef7c4bab8bd675297efe,Zappos,zappos,http://www.crunchbase.com/company/zappos,http://www.zappos.com,http://www.blogs.zappos.com/blogs,http://feeds.feedburner.com/ZapposBlogs,zappos,web,1500.0,...,[],"{'price_amount': 1200000000, 'price_currency_c...",[],"[{'description': 'Headquarters', 'address1': '...","[{'id': 864, 'description': 'My baby cousin ju...",,"[{'embed_code': '<object width=""425"" height=""3...",[],[{'external_url': 'http://blog.mixergy.com/cus...,[]
6,52cdef7c4bab8bd6752981b8,Meraki,meraki,http://www.crunchbase.com/company/meraki,http://meraki.com,http://meraki.com/blog/,http://meraki.com/news/feed/,meraki,network_hosting,,...,[],"{'price_amount': 1200000000, 'price_currency_c...",[],"[{'description': '', 'address1': '660 Alabama ...","[{'id': 1411, 'description': 'Meraki and One E...",,[],[],[{'external_url': 'http://www.meraki.com/compa...,[]
7,52cdef7c4bab8bd675298051,Tumblr,tumblr,http://www.crunchbase.com/company/tumblr,http://tumblr.com,http://staff.tumblr.com/,http://staff.tumblr.com/rss,tumblr,web,,...,[],"{'price_amount': 1100000000, 'price_currency_c...",[],"[{'description': None, 'address1': '419 Park A...","[{'id': 6858, 'description': 'Tumblr Is On Fir...",,"[{'embed_code': '<object classid=""clsid:D27CDB...","[{'available_sizes': [[[150, 111], 'assets/ima...",[{'external_url': 'http://www.alleyinsider.com...,[]
8,52cdef7c4bab8bd675297f0c,Alibaba,alibaba,http://www.crunchbase.com/company/alibaba,http://www.alibaba.com,,,AlibabaTalk,ecommerce,,...,[{'funding_round': {'round_code': 'private_equ...,"{'price_amount': 1000000000, 'price_currency_c...","[{'price_amount': None, 'price_currency_code':...","[{'description': '', 'address1': '6/F Chuangye...","[{'id': 1514, 'description': 'Alibaba gains ex...","{'valuation_amount': None, 'valuation_currency...",[],[],[{'external_url': 'http://www.alibaba.com/show...,"[{'partner_name': 'CTQuan', 'homepage_url': 'h..."
9,52cdef7d4bab8bd675299f11,Kiva Systems,kiva-systems,http://www.crunchbase.com/company/kiva-systems,http://www.kivasystems.com,,,KivaSystems,hardware,,...,[],"{'price_amount': 775000000, 'price_currency_co...",[],"[{'description': 'Headquarters', 'address1': '...",[],,[],"[{'available_sizes': [[[150, 100], 'assets/ima...",[{'external_url': 'http://www.youtube.com/watc...,[]


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [135]:
ex16 = collection.find(
    {"$and": [
        {"category_code": "web"},
        {"number_of_employees": {"$gt": 4000}}
    ]}).sort("number_of_employees")
df16 = pd.DataFrame(ex16)
df16.head(5)

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,investments,acquisition,acquisitions,offices,milestones,ipo,video_embeds,screenshots,external_links,partners
0,52cdef7c4bab8bd67529822a,Expedia,expedia,http://www.crunchbase.com/company/expedia,http://www.expedia.com,,,Expedia,web,4400,...,"[{'funding_round': {'round_code': 'c', 'source...","{'price_amount': None, 'price_currency_code': ...","[{'price_amount': None, 'price_currency_code':...","[{'description': 'Corporate Office', 'address1...","[{'id': 11691, 'description': 'US AIRWAYS AND ...","{'valuation_amount': None, 'valuation_currency...",[],"[{'available_sizes': [[[150, 93], 'assets/imag...",[{'external_url': 'http://www.urlaubs-rabatte....,[]
1,52cdef7c4bab8bd675297e96,AOL,aol,http://www.crunchbase.com/company/aol,http://www.aol.com,http://blog.aol.com/,http://feeds.feedburner.com/AolBlog?format=xml,aol,web,8000,...,"[{'funding_round': {'round_code': 'b', 'source...",,"[{'price_amount': 40000000, 'price_currency_co...","[{'description': 'HQ', 'address1': '770 Broadw...","[{'id': 2689, 'description': 'AOL relauches, c...","{'valuation_amount': None, 'valuation_currency...","[{'embed_code': '<embed src=""http://blip.tv/pl...","[{'available_sizes': [[[150, 91], 'assets/imag...","[{'external_url': 'http://mashpedia.com/AOL', ...",[]
2,52cdef7c4bab8bd675297ea4,Webkinz,webkinz,http://www.crunchbase.com/company/webkinz,http://www.webkinz.com,http://www.webkinz.com,http://piczo.com,webkinz,web,8657,...,[],,[],"[{'description': None, 'address1': 'One Pearce...",[],,[],[],"[{'external_url': 'http://webkinz-tips.com', '...",[]
3,52cdef7c4bab8bd675297fcb,Rakuten,rakuten,http://www.crunchbase.com/company/rakuten,http://global.rakuten.com/corp,,,RakutenGlobal,web,10000,...,[{'funding_round': {'round_code': 'unattribute...,,"[{'price_amount': 425000000, 'price_currency_c...","[{'description': 'Headquarter', 'address1': 'R...","[{'id': 7034, 'description': 'RS Empowerment a...",,[],"[{'available_sizes': [[[150, 111], 'assets/ima...",[{'external_url': 'http://www.brightwire.com/c...,[]
4,52cdef7c4bab8bd67529834c,Los Angeles Times Media Group,los-angeles-times-media-group,http://www.crunchbase.com/company/los-angeles-...,http://www.latimes.com,http://www.latimes.com/blogs,http://feeds.latimes.com/latimes/news,latimes,web,10000,...,"[{'funding_round': {'round_code': 'a', 'source...","{'price_amount': None, 'price_currency_code': ...",[],"[{'description': '', 'address1': '', 'address2...","[{'id': 1530, 'description': 'February 1, 1873...",,[],[],[],[]


### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [136]:
ex17 = collection.find(
    {"$and": [
        {"acquisition.price_currency_code": "EUR"},
        {"acquisition.price_amount": {"$gt": 10000000}}
    ]})
df17 = pd.DataFrame(ex17)
df17.sample(5)

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,investments,acquisition,acquisitions,offices,milestones,ipo,video_embeds,screenshots,external_links,partners
3,52cdef7e4bab8bd67529a536,Webedia,webedia,http://www.crunchbase.com/company/webedia,http://purepeople.com,,http://www.purepeople.com/rss,purepeople,web,50.0,...,[],"{'price_amount': 70000000, 'price_currency_cod...",[],"[{'description': '', 'address1': '', 'address2...",[],,[],"[{'available_sizes': [[[150, 93], 'assets/imag...",[],[]
5,52cdef7e4bab8bd67529b747,Tuenti Technologies,tuenti-technologies,http://www.crunchbase.com/company/tuenti-techn...,http://www.tuenti.com,http://blog.tuenti.com/,http://blog.tuenti.com/feed/,tuenti,social,170.0,...,[],"{'price_amount': 70000000, 'price_currency_cod...",[],"[{'description': '', 'address1': 'Plaza Cortes...",[],,[],"[{'available_sizes': [[[150, 71], 'assets/imag...",[],[]
1,52cdef7d4bab8bd675298bf3,Apertio,apertio,http://www.crunchbase.com/company/apertio,http://www.apertio.com,,,,mobile,,...,[],"{'price_amount': 140000000, 'price_currency_co...",[],"[{'description': None, 'address1': '', 'addres...",[],,[],[],[],[]
6,52cdef7f4bab8bd67529c0cf,BioMed Central,biomed-central,http://www.crunchbase.com/company/biomed-central,http://www.biomedcentral.com,,,,ecommerce,,...,[],"{'price_amount': 43400000, 'price_currency_cod...",[],"[{'description': '', 'address1': 'Floor 6, 236...",[],,[],"[{'available_sizes': [[[150, 80], 'assets/imag...",[],[]
4,52cdef7e4bab8bd67529a729,Wayfinder,wayfinder,http://www.crunchbase.com/company/wayfinder,http://www.wayfinder.com,http://www.wayfinder.com/?id=3848&lang=en-US,http://www.wayfinder.com/?tid=customrssfeeder&...,,,,...,[],"{'price_amount': 24000000, 'price_currency_cod...",[],"[{'description': '', 'address1': '', 'address2...",[],,[],[],[],[]


### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [142]:
ex18 = collection.find({"acquisition.acquired_month": {"$lt": 4}}, {"name":1, "acquisition":1, "_id": 0}).limit(10)
df18 = pd.DataFrame(ex18)
df18

Unnamed: 0,name,acquisition
0,Kyte,"{'price_amount': None, 'price_currency_code': ..."
1,NetRatings,"{'price_amount': 327000000, 'price_currency_co..."
2,blogTV,"{'price_amount': None, 'price_currency_code': ..."
3,Livestream,"{'price_amount': None, 'price_currency_code': ..."
4,iContact,"{'price_amount': 169000000, 'price_currency_co..."
5,Coghead,"{'price_amount': None, 'price_currency_code': ..."
6,Dailymotion,"{'price_amount': 168000000, 'price_currency_co..."
7,Netvibes,"{'price_amount': None, 'price_currency_code': ..."
8,Flickr,"{'price_amount': None, 'price_currency_code': ..."
9,BabyCenter,"{'price_amount': None, 'price_currency_code': ..."


# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [143]:
ex19 = collection.find(
    {"$and": [
        {"founded_year": {"$gt": 1999}},
        {"founded_year": {"$lt": 2011}},
        {"acquisition.acquired_year": {"$gt": 2011}}
    ]})
df19 = pd.DataFrame(ex19)
df19.sample(5)

Unnamed: 0,_id,name,permalink,crunchbase_url,homepage_url,blog_url,blog_feed_url,twitter_username,category_code,number_of_employees,...,offices,milestones,video_embeds,screenshots,external_links,partners,deadpooled_month,deadpooled_day,deadpooled_url,ipo
76,52cdef7c4bab8bd675298662,Weplay,weplay,http://www.crunchbase.com/company/weplay,http://www.weplay.com,http://www.weplay.com/blog,,weplay,web,10.0,...,"[{'description': 'New York Office', 'address1'...","[{'id': 17367, 'description': 'Lane Soelberg i...",[],[],"[{'external_url': 'http://www.weplay.com/', 't...",[],,,,
209,52cdef7e4bab8bd67529b183,Sourcefire,sourcefire,http://www.crunchbase.com/company/sourcefire,http://www.sourcefire.com,,,,software,,...,"[{'description': 'HQ', 'address1': '9770 Patux...","[{'id': 19500, 'description': 'Sourcefireâ€™s ...",[],"[{'available_sizes': [[[150, 126], 'assets/ima...",[],[],,,,"{'valuation_amount': None, 'valuation_currency..."
201,52cdef7e4bab8bd67529af1f,Shutl,shutl,http://www.crunchbase.com/company/shutl,http://shutl.co.uk,http://shutl.co.uk/news/blog,http://www.shutl.co.uk/?cat=9&feed=rss2,shutl,ecommerce,10.0,...,"[{'description': 'LaunchPad', 'address1': '11 ...",[],"[{'embed_code': '<iframe width=""430"" height=""2...","[{'available_sizes': [[[150, 116], 'assets/ima...",[{'external_url': 'http://eu.techcrunch.com/20...,[],,,,
130,52cdef7d4bab8bd67529990a,Likewise Software,likewise-software,http://www.crunchbase.com/company/likewise-sof...,http://www.likewise.com,http://likewise.com/blog,http://www.likewise.com/community/index.php/bl...,,software,50.0,...,"[{'description': 'Headquarters', 'address1': '...",[],[],[],[{'external_url': 'http://www.cio.com/article/...,[],,,,
90,52cdef7d4bab8bd6752989b8,PrimeSense,primesense,http://www.crunchbase.com/company/primesense,http://primesense.com,http://www.primesense.com/news/our-blog/,,GoPrimeSense,hardware,,...,"[{'description': 'HQ', 'address1': '30 Habarze...",[],[],"[{'available_sizes': [[[150, 106], 'assets/ima...",[],[],,,,


### 20. All the companies that have been 'deadpooled' after the third year.

In [145]:
ex20 = collection.find({(int("founded_year") - int("deadpooled_year")): {"$gt": 3}})

ValueError: invalid literal for int() with base 10: 'founded_year'