# Advanced Querying Mongo

Importing libraries and setting up connection

In [8]:
from pymongo import MongoClient
dbName = "companies"
mongodbURL = f"mongodb://localhost/{dbName}"
client = MongoClient(mongodbURL, connectTimeoutMS=2000, serverSelectionTimeoutMS=2000)
db = client.get_database()
cursor = db.list_collections()

### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [17]:
import pandas as pd

In [24]:
name = { "name": "Babelgum" }

cur = db.companies.find(name).limit(1)
data = list(cur)

df = pd.DataFrame(data)
df.T

Unnamed: 0,0
_id,52cdef7c4bab8bd675297da0
name,Babelgum
permalink,babelgum
crunchbase_url,http://www.crunchbase.com/company/babelgum
homepage_url,http://babelgum.com
blog_url,http://babelgum.com/blog
blog_feed_url,http://feeds.feedburner.com/Babelgum
twitter_username,Babelgum
category_code,games_video
number_of_employees,


### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [29]:
employees = db.companies.find({"number_of_employees": {"$gte": 5000}}).sort(
    [("number_of_employees",-1)]).limit(20)

sol = list(employees)
len(sol)

20

### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [36]:
query = {
    "$and":[
        {"founded_year":{"$gte":2000}},
        {"founded_year":{"$lte":2005}}
    ]
}

projection = {"name":1, "founded_year":1}
 
cur = db.companies.find(query, projection)
data = list(cur)
len (data)

3734

### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [41]:
query = {
    "$and":[
        {"ipo.valuation_amount":{"$gt":100000000}},
        {"founded_year":{"$lte":2010}}
    ]
}

projection = {"name":1, "ipo":1}
 
cur = db.companies.find(query, projection)
data = list(cur)
len (data)

42

### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [45]:
query = {
    "$and":[
        {"number_of_employees":{"$lt":1000}},
        {"founded_year":{"$lte":2005}}
    ]
}

projection = {"name":1, "number_of_employees":1}
 
cur = db.companies.find(query, projection).sort("number_of_employees",-1).limit(10)
data = list(cur)
len (data)

10

### 6. All the companies that don't include the `partners` field.

In [47]:
query={"partners":{"$nin":[]}}

cur = db.companies.find(query)
data = list(cur)
len (data)

18801

### 7. All the companies that have a null type of value on the `category_code` field.

In [48]:
query={ "category_code" : { "$type": 10 } }

cur = db.companies.find(query)
data = list(cur)
len (data)

2751

### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [49]:
query = {
    "$and":[
        {"number_of_employees":{"$gte":100}},
        {"number_of_employees":{"$lte":1000}}
    ]
}

projection = {"name":1,"number_of_employees":1}

cur = db.companies.find(query, projection)
data = list(cur)
len (data)

942

### 9. Order all the companies by their IPO price in a descending order.

In [60]:
query = {}

projection = {"name":1,"ipo.valuation_amount":1}

cur = db.companies.find(query,projection).sort("ipo.valuation_amount",-1)
data = list(cur)
len (data)

OperationFailure: Executor error during find command :: caused by :: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.

### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [53]:
query = {}

projection = {"name":1}

cur = db.companies.find(query, projection).sort([("number_of_employees", -1)]).limit(10)
data = list(cur)
len (data)

10

### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [62]:
query = {"founded_month":{"$gte":7,"$lt":12}}
cur=list(db.companies.find(query).limit(1000))
len(cur)

1000

### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [65]:
query = {"$and":[{"founded_year":{"$lt":2000}},
                 {"acquisition.price_amount":{"$gt" :10000000}}]}

cur=list(db.companies.find(query))
len(cur)

205

### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [67]:
query = {"acquisition.acquired_year":{"$gt":2010}}

cur=list(db.companies.find(query).sort([("acquisition.price_amount", -1)]))
len(cur)

736

### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [72]:
query = {}
cur=list(db.companies.find(query,{"name":1,"founded_year":1}
                          ).sort([("founded_year")]))
len(cur)

ValueError: too many values to unpack (expected 2)

### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [74]:
query = {"founded_day":{"$lte":7}}
cur=list(db.companies.find(query).sort([("acquisition.price_amount", -1)]).limit(10))
len(cur)

10

### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [73]:
query = {"$and":[{"category_code":{"$eq":"web"}},{"number_of_employees":{"$gt":4000}}]}
cur=list(db.companies.find(query).sort([("number_of_employees", 1)])
len(cur)

SyntaxError: invalid syntax (<ipython-input-73-29302d0b88c6>, line 3)

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [None]:
# Your Code

### 20. All the companies that have been 'deadpooled' after the third year.

In [None]:
# Your Code