# Advanced Querying Mongo

Importing libraries and setting up connection

In [1]:
from pymongo import MongoClient
from pymongo import ASCENDING, DESCENDING
import pandas as pd
dbName = "companies"
mongodbURL = f"mongodb://localhost/{dbName}"
client = MongoClient(mongodbURL, connectTimeoutMS=2000, serverSelectionTimeoutMS=2000)
db = client.get_database()
cursor=db.list_collections()


### 1. All the companies whose name match 'Babelgum'. Retrieve only their `name` field.

In [2]:
queryname= db.companies.find({"name":"Babelgum"},{"name":1, "_id":0}).limit(1)
df=pd.DataFrame(list(queryname))
df

Unnamed: 0,name
0,Babelgum


In [3]:
queryname= db.companies.find().limit(1)
df=pd.DataFrame(list(queryname))
df.columns

Index(['_id', 'name', 'permalink', 'crunchbase_url', 'homepage_url',
       'blog_url', 'blog_feed_url', 'twitter_username', 'category_code',
       'number_of_employees', 'founded_year', 'deadpooled_year', 'tag_list',
       'alias_list', 'email_address', 'phone_number', 'description',
       'created_at', 'updated_at', 'overview', 'image', 'products',
       'relationships', 'competitions', 'providerships', 'total_money_raised',
       'funding_rounds', 'investments', 'acquisition', 'acquisitions',
       'offices', 'milestones', 'video_embeds', 'screenshots',
       'external_links', 'partners'],
      dtype='object')

### 2. All the companies that have more than 5000 employees. Limit the search to 20 companies and sort them by **number of employees**.

In [4]:
query = {
    "$and":[
        {"number_of_employees":{"$gte":5000}}
    ]
}

cur = db.companies.find(query).limit(20)
df =pd.DataFrame(list(cur))
ok=df[['name', 'number_of_employees']]
ok.sort_values(by='number_of_employees', ascending=False)

Unnamed: 0,name,number_of_employees
10,PayPal,300000
7,Sony,180500
15,Nokia,125000
16,Microsoft,90000
4,Intel,86300
17,Apple,80000
3,Cisco,63000
19,Motorola Solutions,51000
13,Sun Microsystems,33350
5,Google,28000


### 3. All the companies founded between 2000 and 2005, both years included. Retrieve only the `name` and `founded_year` fields.

In [5]:
query = {
    "$and":[
        {"founded_year":{"$gte":2000}},
        {"founded_year":{"$lte":2005}}
        
        
    ]
}

cur = db.companies.find(query).limit(20)
df =pd.DataFrame(list(cur))
df1=df[['name', 'founded_year']]
df1

Unnamed: 0,name,founded_year
0,Zoho,2005
1,Omnidrive,2005
2,Digg,2004
3,Wetpaint,2005
4,StumbleUpon,2002
5,Gizmoz,2003
6,Facebook,2004
7,Helio,2005
8,Plaxo,2002
9,Technorati,2002


### 4. All the companies that had a Valuation Amount of more than 100.000.000 and have been founded before 2010. Retrieve only the `name` and `ipo` fields.

In [6]:
query = {
    "$and":[
        {"ipo.valuation_amount":{"$gte":100000000 }},
        {"founded_year":{"$lte":2010}}
        
        
    ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name', 'ipo']]
df1

Unnamed: 0,name,ipo
0,Facebook,"{'valuation_amount': 104000000000, 'valuation_..."
1,Twitter,"{'valuation_amount': 18100000000, 'valuation_c..."
2,Yelp,"{'valuation_amount': 1300000000, 'valuation_cu..."
3,LinkedIn,"{'valuation_amount': 9310000000, 'valuation_cu..."
4,Brightcove,"{'valuation_amount': 290000000, 'valuation_cur..."


### 5. All the companies that have less than 1000 employees and have been founded before 2005. Order them by the number of employees and limit the search to 10 companies.

In [7]:
query = {
    "$and":[
        {"number_of_employees":{"$gte":1000 }},
        {"founded_year":{"$lte":2005}}
        
        
    ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name','founded_year', 'number_of_employees']]
df1.sort_values(by='number_of_employees', ascending=False)

Unnamed: 0,name,founded_year,number_of_employees
4,Cisco,1984,63000
2,eBay,1995,15000
3,Yahoo!,1994,13600
1,Facebook,2004,5299
0,Zoho,2005,1600


### 6. All the companies that don't include the `partners` field.

In [8]:
cur = db.companies.find({'partners':{'$exists':'false'}}).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name','partners']]
df1

Unnamed: 0,name,partners
0,AdventNet,[]
1,Zoho,[]
2,Omnidrive,[]
3,Postini,[]
4,Geni,[]


### 7. All the companies that have a null type of value on the `category_code` field.

In [9]:
cur = db.companies.find({'category_code':{'$type':'null'}}).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name','category_code']]
df1

Unnamed: 0,name,category_code
0,Collective,
1,Snimmer,
2,KoolIM,
3,Level9 Media,
4,VidKing,


### 8. All the companies that have at least 100 employees but less than 1000. Retrieve only the `name` and `number of employees` fields.

In [10]:
query = {
    "$and":[
        {"number_of_employees":{"$gte":100 }},
        {"number_of_employees":{"$lte":1000}}
        
        
    ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name', 'number_of_employees']]
df1.sort_values(by='number_of_employees', ascending=False)

Unnamed: 0,name,number_of_employees
3,LifeLock,644
0,AdventNet,600
2,OpenX,305
1,AddThis,120
4,Jajah,110


### 9. Order all the companies by their IPO price in a descending order.

In [11]:
cur = db.companies.find({'ipo.valuation_amount': {'$exists': 'true'}}).sort([('ipo.valuation_amount',DESCENDING)]).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name', 'ipo']]
df1

Unnamed: 0,name,ipo
0,GREE,"{'valuation_amount': 108960000000, 'valuation_..."
1,Facebook,"{'valuation_amount': 104000000000, 'valuation_..."
2,Amazon,"{'valuation_amount': 100000000000, 'valuation_..."
3,Twitter,"{'valuation_amount': 18100000000, 'valuation_c..."
4,Groupon,"{'valuation_amount': 12800000000, 'valuation_c..."


### 10. Retrieve the 10 companies with more employees, order by the `number of employees`

In [12]:
cur = db.companies.find().sort([('number_of_employees', DESCENDING)]).limit(10)
df =pd.DataFrame(list(cur))
df1=df[['name', 'number_of_employees']]
df1


Unnamed: 0,name,number_of_employees
0,Siemens,405000
1,IBM,388000
2,Toyota,320000
3,PayPal,300000
4,Nippon Telegraph and Telephone Corporation,227000
5,Samsung Electronics,221726
6,Accenture,205000
7,Tata Consultancy Services,200300
8,Flextronics International,200000
9,Safeway,186000


### 11. All the companies founded on the second semester of the year. Limit your search to 1000 companies.

In [13]:
query = {
    "$and":[
        {"founded_month":{"$gte":7 }},
         ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name','founded_month']]
df1.sort_values(by='founded_month', ascending=True)


Unnamed: 0,name,founded_month
0,Zoho,9
2,Digg,10
3,Wetpaint,10
4,Joost,10
1,Omnidrive,11


### 12. All the companies founded before 2000 that have an acquisition amount of more than 10.000.00

In [14]:
query = {
    "$and":[
        {"founded_year":{"$lte":2000 }},
        {"acquisition.price_amount":{"$gte":10000}}
        
        
    ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name', 'founded_year','acquisition']]
df1

Unnamed: 0,name,founded_year,acquisition
0,Postini,1999,"{'price_amount': 625000000, 'price_currency_co..."
1,SideStep,1999,"{'price_amount': 180000000, 'price_currency_co..."
2,Recipezaar,1999,"{'price_amount': 25000000, 'price_currency_cod..."
3,Cyworld,1999,"{'price_amount': 7140000, 'price_currency_code..."
4,PayPal,1998,"{'price_amount': 1500000000, 'price_currency_c..."


### 13. All the companies that have been acquired after 2010, order by the acquisition amount, and retrieve only their `name` and `acquisition` field.

In [15]:
query = {
    "$and":[
        {"acquisition.acquired_year":{"$lte":2010}},
        
        
        
    ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name','acquisition']]
df1

Unnamed: 0,name,acquisition
0,Postini,"{'price_amount': 625000000, 'price_currency_co..."
1,Flektor,"{'price_amount': 20000000, 'price_currency_cod..."
2,StumbleUpon,"{'price_amount': 29000000, 'price_currency_cod..."
3,Gizmoz,"{'price_amount': None, 'price_currency_code': ..."
4,Helio,"{'price_amount': 39000000, 'price_currency_cod..."


### 14. Order the companies by their `founded year`, retrieving only their `name` and `founded year`.

In [29]:
cur = db.companies.find().sort([('funded_year', DESCENDING)]).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name','founded_year']]
df1

Unnamed: 0,name,founded_year
0,AdventNet,1996
1,Zoho,2005
2,Omnidrive,2005
3,Postini,1999
4,Geni,2006


### 15. All the companies that have been founded on the first seven days of the month, including the seventh. Sort them by their `acquisition price` in a descending order. Limit the search to 10 documents.

In [40]:
cur = db.companies.find({'founded_day':{"$lte":7}}).sort([("acquisition.price_amount", DESCENDING)]).limit(5)
df =pd.DataFrame(list(cur))
df1=df[["name" ]]
df1

Unnamed: 0,name
0,Netscape
1,PayPal
2,Zappos
3,Alibaba
4,Postini


### 16. All the companies on the 'web' `category` that have more than 4000 employees. Sort them by the amount of employees in ascending order.

In [18]:
cur = db.companies.find({'founded_month': {'$lte': 7}}).sort([('acquisition.price_amount', DESCENDING)]).limit(10)
for e in cur:
    print(e['name'])

### 17. All the companies whose acquisition amount is more than 10.000.000, and currency is 'EUR'.

In [43]:
query = {
    "$and":[
        {"acquisition.price_amount":{"$lte":10000000}},
        {"acquisition.price_currency_code":{"$eq":"EUR"}}
        
        
    ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name','acquisition']]
df1

Unnamed: 0,name,acquisition
0,Mobile Trend,"{'price_amount': 7000000, 'price_currency_code..."
1,Citizenside,"{'price_amount': 30000, 'price_currency_code':..."
2,Excite Europe,"{'price_amount': 2500000, 'price_currency_code..."
3,YelloYello,"{'price_amount': 2500000, 'price_currency_code..."
4,Branded Payment Solutions,"{'price_amount': 3100000, 'price_currency_code..."


### 18. All the companies that have been acquired on the first trimester of the year. Limit the search to 10 companies, and retrieve only their `name` and `acquisition` fields.

In [45]:
query = {
    "$and":[
        {"founded_month":{"$lte":3}}
        
        
    ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name','acquisition']]
df1

Unnamed: 0,name,acquisition
0,StumbleUpon,"{'price_amount': 29000000, 'price_currency_cod..."
1,Gizmoz,"{'price_amount': None, 'price_currency_code': ..."
2,Facebook,
3,Helio,"{'price_amount': 39000000, 'price_currency_cod..."
4,Twitter,


# Bonus
### 19. All the companies that have been founded between 2000 and 2010, but have not been acquired before 2011.

In [59]:
query = {
    "$and":[
        {"founded_year":{"$lte":2010}},
        {"founded_month":{"$gte":2010}},
        {"acquisition.acquired_year":{"$eq":-2011}}
      
        
    ]
}

cur = db.companies.find(query).limit(5)
df =pd.DataFrame(list(cur))
df1=df[['name']]
df1

KeyError: "None of [Index(['name', 'founded_year'], dtype='object')] are in the [columns]"

### 20. All the companies that have been 'deadpooled' after the third year.

In [22]:
# Your Code