# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient
from pymongo import ASCENDING, DESCENDING

In [2]:
client = MongoClient("mongodb://localhost/companies")
db = client.get_database()

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [82]:
# Your Code
cursor = db.companies.find({"name":"Babelgum"},{"name":1,"_id":0})

for name in cursor:
    print(name)  

{'name': 'Babelgum'}


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [110]:
# Your Code
cursor = db.companies.find({"number_of_employees": {"$gt":5000}}).sort([("number_of_employees", ASCENDING)]).limit(20)
len(list(cursor))

20

In [111]:
#En este caso no obtengo el mismo resultado, pero en el ejercicio 7 (que la query no contiene limit()) sí que obtengo lo mismo 
db.companies.find({"number_of_employees": {"$gt":5000}}).sort([("number_of_employees", ASCENDING)]).limit(20).count()

  


109

In [112]:
# y sobre cursor igual que sobre la query directamente
cursor.count()

  """Entry point for launching an IPython kernel.


109

In [3]:
# la función count parece no tener en cuenta el limit()
cursor = db.companies.find({"number_of_employees": {"$gt":5000}}).sort([("number_of_employees", ASCENDING)])
len(list(cursor))

109

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [84]:
# Your Code
def companiesYearF(year):
    query = db.companies.find({"founded_year": year},{"name":1,"founded_year":1,"_id":0})
    return query
    
for i in range(2000,2006):
    cursor = companiesYearF(i)
    for company in cursor:
        print(company)
        break
# Solo un break para que imprima el primero de cada año

{'name': 'AllofMP3', 'founded_year': 2000}
{'name': 'TechnologyGuide', 'founded_year': 2001}
{'name': 'StumbleUpon', 'founded_year': 2002}
{'name': 'Gizmoz', 'founded_year': 2003}
{'name': 'Digg', 'founded_year': 2004}
{'name': 'Wetpaint', 'founded_year': 2005}


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [146]:
# Your Code
cursor = db.companies.find({"$and":[{"ipo.valuation_amount": {"$gt":100000000}}, {"founded_year": {"$lt":2010}},
                                   {"founded_year":{"$not":{"$type":["null"]}}}]},{"name":1,"ipo":1})

#pruebo count sobre la query directamente
db.companies.find({"$and":[{"ipo.valuation_amount": {"$gt":100000000}}, {"founded_year": {"$lt":2010}},
                                   {"founded_year":{"$not":{"$type":["null"]}}}]},{"name":1,"ipo":1}).count()

  import sys


42

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [144]:
# Your Code
cursor = db.companies.find({"$and":[{"number_of_employees": {"$lt":1000}},{"founded_year": {"$lt":2005}},
                                   {"number_of_employees":{"$not":{"$type":["null"]}}},
                                   {"founded_year":{"$not":{"$type":["null"]}}}]}).sort([("number_of_employees", ASCENDING)]).limit(10)

len(list(cursor))

10

### 6. All the companies that don't include the `partners` field.

In [103]:
# Your Code
cursor = db.companies.find({"partners": {"$exists":False}})

len(list(cursor))

0

In [106]:
# Your Code
cursor = db.companies.find({"partners": {"$not":{"$exists":True}}})

len(list(cursor))

0

### 7. All the companies that have a null type of value on the `category_code` field.

In [107]:
# Your Code
cursor = db.companies.find({"category_code": {"$type":["null"]}})

len(list(cursor))

2751

In [108]:
# Aquí obtengo el mismo resultado entre len() y count()
cursor = db.companies.find({"category_code": {"$type":["null"]}})

db.companies.find({"category_code": {"$type":["null"]}}).count()

  after removing the cwd from sys.path.


2751

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [113]:
# Your Code
cursor = db.companies.find({"number_of_employees":{"$in":[i for i in range(100,1000)]}},{"name":1,"number_of_employees":1,"_id":0})

cursor.count()

  after removing the cwd from sys.path.


917

### 9. Order all the companies by their IPO price in a descending order.

In [139]:
# Your Code
cursor = db.companies.find({"ipo.valuation_amount":{"$not":{"$type":["null"]}}}).sort([("ipo.valuation_amount", DESCENDING)]).limit(100)

#lo he limitado a 100 porque me daba error por falta de memoria RAM
len(list(cursor))

100

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [137]:
# Your Code
cursor = db.companies.find({"number_of_employees":{"$not":{"$type":["null"]}}}).sort("number_of_employees",-1).limit(10)
#Me da fallo de memoria RAM
len(list(cursor))

10

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [129]:
# Your Code
cursor = db.companies.find({"founded_month":{"$gt": 6}}).limit(1000)

len(list(cursor))

1000

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [134]:
# Your Code
cursor = db.companies.find({"$and":[{"founded_year":{"$not":{"$type":["null"]}}},
                                    {"founded_year": {"$lt":2000}},{"acquisition.price_amount": {"$gt":10000000}}]})

cursor.count()

  """


205

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [143]:
# Your Code
cursor = db.companies.find({"acquisition.acquired_year": {"$gt":2010}},
                           {"name":1,"acquisition":1,"_id":0}).sort([("acquisition.price_amount",ASCENDING)])

cursor.count()

  """


736

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [142]:
# Your Code
cursor = db.companies.find({"founded_year":{"$not":{"$type":["null"]}}},
                           {"name":1,"founded_year":1,"_id":0}).sort("founded_year",ASCENDING).limit(10)

len(list(cursor)) #lo limito a 10 por error con la memoria RAM

10

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [148]:
# Your Code
cursor = db.companies.find({"$and":[{"founded_day":{"$in":[i for i in range(1,8)]}},
                                    {"acquisition.price_amount":{"$not":{"$type":["null"]}}}]}).sort([("acquisition.price_amount",DESCENDING)]).limit(10)

len(list(cursor))

10

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [149]:
# Your Code
cursor = db.companies.find({"$and": [{"category_code":{"$eq":"web"}},{"number_of_employees":{"$gt":4000}}]}).sort([("number_of_employees",ASCENDING)])

len(list(cursor))

9

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [150]:
# Your Code
cursor = db.companies.find({"$and": [{"acquisition.price_amount":{"$gt":10000000}},
                                     {"acquisition.price_currency_code":{"$eq":"EUR"}}]})

len(list(cursor))

7

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [152]:
# Your Code
cursor = db.companies.find({"acquisition.acquired_month":{"$in":[i for i in range(1,4)]}},
                           {"name":1,"acquisition":1,"_id":0}).limit(10)

len(list(cursor))

10

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [153]:
# Your Code
def companiesYearAcq(year):
    query = db.companies.find({"$and": [{"founded_year": year},{"acquisition.acquired_year":{"$gte":2011}}]})
    return query

count = 0
for i in range(2000,2011):
    cursor = companiesYearAcq(i)
    for company in cursor:
        count+=1
print(count)

486


### 20. All the companies that have been 'deadpooled' after the third year.

In [13]:
def compDeadpool(dictionary):
    for key,value in dictionary.items():
        if db.companies.find({"$and":[{"_id": dictionary["_id"]},{"deadpooled_year": dictionary["founded_year"]+4}]}):
            return db.companies.find({"$and":[{"_id": dictionary["_id"]},{"deadpooled_year": dictionary["founded_year"]+4}]})

cursor = db.companies.find({"$and":[{"founded_year":{"$not":{"$type":["null"]}}},{"deadpooled_year":{ "$not":{"$type":["null"]}}}]},{"founded_year":1})



for company in cursor:
    a = compDeadpool(company)
    for comp in a:
        print(comp)
        break

{'_id': ObjectId('52cdef7c4bab8bd675297dcc'), 'name': 'AllPeers', 'permalink': 'allpeers', 'crunchbase_url': 'http://www.crunchbase.com/company/allpeers', 'homepage_url': 'http://www.allpeers.com', 'blog_url': 'http://www.allpeers.com/blog/', 'blog_feed_url': 'http://www.allpeers.com/blog/feed/', 'twitter_username': None, 'category_code': 'web', 'number_of_employees': None, 'founded_year': 2004, 'founded_month': 8, 'founded_day': 1, 'deadpooled_year': 2008, 'deadpooled_month': 3, 'deadpooled_day': 3, 'deadpooled_url': 'http://www.techcrunch.com/2008/03/03/much-hyped-allpeers-to-deadpool/', 'tag_list': 'allpeers', 'alias_list': None, 'email_address': None, 'phone_number': None, 'description': None, 'created_at': 'Thu Jun 28 07:47:43 UTC 2007', 'updated_at': 'Sat Mar 06 00:28:43 UTC 2010', 'overview': '<p>AllPeers is a simple, persistent buddy list in the browser. Initially, interaction with those buddies will be limited to discovering and sharing files - If you choose to, you can share 