# Advanced Querying Mongo

Importing libraries and setting up connection

In [9]:
from pymongo import MongoClient
dbName = "companies"
mongodbURL = f"mongodb://localhost/{dbName}"
client = MongoClient(mongodbURL, connectTimeoutMS=2000, serverSelectionTimeoutMS=2000)
db = client.get_database()

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [209]:
cursor = db.list_collections()
for collection in cursor:
    print(collection["name"])
    
query = {
    "name":{"$eq":"Babelgum"}
}

cur = db.companies.find(query).limit(2)
data = list(cur)


companies


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [32]:
employees = {
    "number_of_employees":{"$gte":5000}
}
cur = db.companies.find(employees).limit(20)
emplo = list(cur)
len(emplo)

20

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [68]:
cb05= {"$and":[
    {"founded_year":{"$gte":2000}},
    {"founded_year":{"$lte":2005},}
              ]
    }
cur = db.companies.find(cb05)
sol = list(cur)

proyection ={"name":1,"founded_year":1,"_id":0}#el id aparece por defecto por lo que veo no?
list(db.companies.find(cb05,proyection).limit(5))

[{'name': 'Zoho', 'founded_year': 2005},
 {'name': 'Omnidrive', 'founded_year': 2005},
 {'name': 'Wetpaint', 'founded_year': 2005},
 {'name': 'Digg', 'founded_year': 2004},
 {'name': 'StumbleUpon', 'founded_year': 2002}]

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [80]:
va= {"$and":[
    {"ipo.valuation_amount":{"$gt":100000000}},
    {"founded_year":{"$lt":2010},}
              ]
    }
cur = db.companies.find(va)
vasol = list(cur)

proyection ={"name":1,"ipo":1,"_id":0}
list(db.companies.find(va,proyection).limit(5))

[{'name': 'Twitter',
  'ipo': {'valuation_amount': 18100000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2013,
   'pub_month': 11,
   'pub_day': 7,
   'stock_symbol': 'NYSE:TWTR'}},
 {'name': 'Facebook',
  'ipo': {'valuation_amount': 104000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 5,
   'pub_day': 18,
   'stock_symbol': 'NASDAQ:FB'}},
 {'name': 'Yelp',
  'ipo': {'valuation_amount': 1300000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2012,
   'pub_month': 3,
   'pub_day': 2,
   'stock_symbol': 'NYSE:YELP'}},
 {'name': 'LinkedIn',
  'ipo': {'valuation_amount': 9310000000,
   'valuation_currency_code': 'USD',
   'pub_year': 2011,
   'pub_month': 7,
   'pub_day': 20,
   'stock_symbol': 'NYSE:LNKD'}},
 {'name': 'Amazon',
  'ipo': {'valuation_amount': 100000000000,
   'valuation_currency_code': 'USD',
   'pub_year': 1997,
   'pub_month': 5,
   'pub_day': None,
   'stock_symbol': 'NASDAQ:AMZN'}}]

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [97]:
empl= {"$and":[
    {"number_of_employees":{"$lt":1000}},
    {"founded_year":{"$lt":2005},}
              ]
    }
cur = db.companies.find(empl).sort([("number_of_employees",-1)])
sol = list(cur)

proyection ={"name":1,"founded_year":1,"_id":0,"number_of_employees":1}
list(db.companies.find(empl,proyection).limit(5))


[{'name': 'AdventNet', 'number_of_employees': 600, 'founded_year': 1996},
 {'name': 'Digg', 'number_of_employees': 60, 'founded_year': 2004},
 {'name': 'Fox Interactive Media',
  'number_of_employees': 0,
  'founded_year': 1979},
 {'name': 'Plaxo', 'number_of_employees': 50, 'founded_year': 2002},
 {'name': 'Technorati', 'number_of_employees': 35, 'founded_year': 2002}]

### 6. All the companies that don't include the `partners` field.

In [106]:
partners = {
    "partners":{
        "$exists": "false"
    }
}
cur = db.companies.find(partners)
solp = list(cur)
print(len(solp))
print(len(list(db.companies.find(partners))))

#cual es la diferencia entre poner .find(...) o no poner nada y como hago mas arriba?

18801
18801


### 7. All the companies that have a null type of value on the `category_code` field.

In [109]:
category_code = {
    "category_code":{
        "$type": "null"
    }
}
cur = db.companies.find(category_code)
solc = list(cur)
len(solc)

2751

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [114]:
empl8= {"$and":[
    {"number_of_employees":{"$gte":100}},
    {"number_of_employees":{"$lt":1000},}
              ]
    }
cur = db.companies.find(empl8)
sol8 = list(cur)
print(len(sol8))
proyection ={"name":1,"number_of_employees":1,"_id":0}
list(db.companies.find(empl8,proyection).limit(5))

917


[{'name': 'AdventNet', 'number_of_employees': 600},
 {'name': 'AddThis', 'number_of_employees': 120},
 {'name': 'OpenX', 'number_of_employees': 305},
 {'name': 'LifeLock', 'number_of_employees': 644},
 {'name': 'Jajah', 'number_of_employees': 110}]

### 9. Order all the companies by their IPO price in a descending order.

In [188]:
partners = {
    "ipo.valuation_amount":{
        "$exists": "true"
    }
}
cur = db.companies.find(partners).sort([("number_of_employees",-1)])
sol9 = list(cur)
print(len(sol9))
proyection ={"name":1,"ipo.valuation_amount":1,"_id":0}
list(db.companies.find(sol9,proyection))
#filter must be an instance of dict, bson.son.SON, or any other type that inherits from collections.Mapping
        #Me da este error... 
    
#alternative={
#    "ipo.valuation_amount" : {"$gt": 0}
#}
#curalt = db.companies.find(alternative).sort([("number_of_employees",-1)])
#sol9_2 = list(curalt)
#print(len(sol9_2))   
#funciona!
#proyection ={"_id":0,"name":1}
#list(db.companies.find(sol9_2,{"_id":0,"name":1}))

390


TypeError: filter must be an instance of dict, bson.son.SON, or any other type that inherits from collections.Mapping

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [189]:
empl10= {
    "number_of_employees":{"$gt":0},           
    }
cur10 = db.companies.find(empl10).limit(10)
sol10 = list(cur10)
print(len(sol10))

#proyection ={"name":1,"number_of_employees":1,"_id":0}
#list(db.companies.find(sol10,proyection).limit(5))
#ME VUELVE A DAR EL ERROR DE ARRIBA!

10


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [195]:
comp11= {
    "founded_month":{"$gte":6}
}
cur11= db.companies.find(comp11).limit(1000)
                                      
sol11=list(cur11)


### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [219]:
ac12= {"$and":[
    {"founded_year":{"$lt":2000}},
    {"acquisition.price_amount":{"$gt":1000000},}
              ]
    }
cur12 = db.companies.find(ac12)
sol12 = list(cur12)
print(len(sol12))



224


### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [227]:
ac13 = {"acquisition.acquired_year":
         {"$lt":2010}   
}
cur13 = db.companies.find(query)
proyection ={"name":1,"number_of_employees":1,"_id":0}
list(db.companies.find(cur13,proyection))
#porque en el 8 funciona!

TypeError: filter must be an instance of dict, bson.son.SON, or any other type that inherits from collections.Mapping

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [229]:
founded_14 ={"founded_year":
        {"$ne":None}
}
cur14 = db.companies.find(founded_14).sort("founded_year").limit(5)

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [230]:
ac15 = {"founded_day":
         {"$lte":7}   
}
cur15 = db.companies.find(ac15).sort([("acquisition.price_amount", -1)]).limit(10)

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [232]:
web16 = {"$and":
         [
        { "category_code": "web" },
        {"number_of_employees":{"$gt":4000}}
]}

cur16 = db.companies.find(web16).sort([("number_of_employees", -1)])

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [233]:
curr17 = {"$and":[
    
        {"acquisition.price_amount":{"$gt":10000000}},
        {"acquisition.price_currency_code": "EUR"}
]}

cur17 = db.companies.find(curr17)

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [236]:
ac18 = {
    "acquisition.acquired_month":{"$lte":4}
}

projection = {"name": 1, "acquisition": 1, "_id": 0}

cur18 = db.companies.find(ac18,projection)
len(list(cur18))
list(db.companies.find(cur18,proyection))
# Me pego un tiro con el error. 

TypeError: filter must be an instance of dict, bson.son.SON, or any other type that inherits from collections.Mapping

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [None]:
# Your Code

### 20. All the companies that have been 'deadpooled' after the third year.

In [239]:
imdead = {"deadpooled_year":{"$gt":3}}


cur20 = db.companies.find(imdead)
len(list(cur20))

926