In [1]:
# We import the libraries required
from pymongo import MongoClient
from bson.json_util import dumps
import pandas as pd

from companies_collection import *

## Goal of this notebook 
### Apply some queries to companies' collection to find those that meet the following criteria:

- Tech companies that do design and other business activities that might be related to videogames
- Tech startups that raised 1M

In [2]:
# We set the connection to the ironhack database in MongoDB, from where we get the collection companies.
client = MongoClient('localhost:27017')
db = client.get_database('ironhack')
companies = db.get_collection('companies')

In [3]:
# To follow the boss' instructions, we check what information available we have for other similar companies in the collection
companies.find_one().keys()

dict_keys(['_id', 'name', 'permalink', 'crunchbase_url', 'homepage_url', 'blog_url', 'blog_feed_url', 'twitter_username', 'category_code', 'number_of_employees', 'founded_year', 'founded_month', 'founded_day', 'deadpooled_year', 'tag_list', 'alias_list', 'email_address', 'phone_number', 'description', 'created_at', 'updated_at', 'overview', 'image', 'products', 'relationships', 'competitions', 'providerships', 'total_money_raised', 'funding_rounds', 'investments', 'acquisition', 'acquisitions', 'offices', 'milestones', 'video_embeds', 'screenshots', 'external_links', 'partners'])

The most interesting keys to filter by are category code, which stands for the business field of the company, and the total money raised, that can help us identify what startups have been the most successful.

## Company code

In [4]:
# We check for tech companies whose field of business is related to ours.
business_activities = companies.distinct('category_code')
print(business_activities)

[None, 'advertising', 'analytics', 'automotive', 'biotech', 'cleantech', 'consulting', 'design', 'ecommerce', 'education', 'enterprise', 'fashion', 'finance', 'games_video', 'government', 'hardware', 'health', 'hospitality', 'legal', 'local', 'manufacturing', 'medical', 'messaging', 'mobile', 'music', 'nanotech', 'network_hosting', 'news', 'nonprofit', 'other', 'photo_video', 'public_relations', 'real_estate', 'search', 'security', 'semiconductor', 'social', 'software', 'sports', 'transportation', 'travel', 'web']


In [5]:
# We'll maintain those companies working on design, videogames, software and web.
query_category = {'category_code': {'$regex': '(design|games_video|software|web)'}}
queried_db = list(companies.find(query_category))

## Total money raised

In [6]:
query_scale = {'total_money_raised': {'$regex': '(?i)m'}} # This query accepts M amounts and avoids K quantities.
query_amount = {'total_money_raised': {'$gte': '1M'}} # Money raised greater than or equal to 1M.
amount_scale = {'$and': [query_amount, query_scale]} # Two queries together.
projection = {'name': 1,'total_money_raised': 1, '_id': 0}

# This query gives us companies having achieved at least a million in funding rounds. However, order should be fixed.
list(companies.find(amount_scale, projection).sort('total_money_raised', -1).limit(10))

[{'name': 'Tuenti Technologies', 'total_money_raised': '€9M'},
 {'name': 'Quaero', 'total_money_raised': '€99M'},
 {'name': 'Proximic', 'total_money_raised': '€9.46M'},
 {'name': 'Blyk', 'total_money_raised': '€87M'},
 {'name': 'Biometric Security', 'total_money_raised': '€8.97M'},
 {'name': 'Goojet', 'total_money_raised': '€8.3M'},
 {'name': 'KeyNeurotek Pharmaceuticals', 'total_money_raised': '€8.2M'},
 {'name': 'quietrevolution', 'total_money_raised': '€7M'},
 {'name': 'MyFab', 'total_money_raised': '€7M'},
 {'name': 'Internet Mall', 'total_money_raised': '€73M'}]

In [7]:
# We apply all queries at once and export the filtered collection.
query_category = {'category_code': {'$regex': '(design|games_video|software|web)'}}
amount_scale = {'$and': [query_amount, query_scale]}

total_query = {'$and': [amount_scale, query_category]} # we add the first query (about categories), to the previously joined queries (scale and amount).
queried_db = list(companies.find(total_query))

## Query and export with functions

In [8]:
companies = mongo('ironhack', 'companies')

In [10]:
collection_queried (companies)

'The collection has been exported, with a total remaining of 98 companies!'