# Data Fetcher

Fetch data from Crunchbase API and save it to a CSV file.

In [15]:
import requests
import pandas as pd
import logging

In [17]:
# 设置API密钥和请求参数
api_key = "cb073f156ecb76c73a287f4ca7cb3e8a"
url = "https://api.crunchbase.com/api/v4/searches/organizations"
query = {
    "cards": "funding_rounds,facts",
    "locations": "China",
    "categories": "enterprise software",
    "limit": 1000,  # 每页返回的结果数,根据需要调整
    "user_key": api_key
}

# Set the base URL for the Crunchbase API
base_url = "https://api.crunchbase.com/api/v4/"

# Set the endpoint for searching organizations
endpoint = "searches/organizations"


# Set the headers with your API key
headers = {
    "accept": "application/json",
    "content-type": "application/json",
    "X-cb-user-key": api_key
}



In [20]:
# 发送请求并解析结果
logging.basicConfig(level=logging.DEBUG)

# Set the query payload
payload = {
  "field_ids": [
    "identifier",
    "location_identifiers",
    "short_description",
    "rank_org"
  ],
  "order": [
    {
      "field_id": "rank_org",
      "sort": "asc"
    }
  ],
  "query": [
    {
      "type": "predicate",
      "field_id": "funding_total",
      "operator_id": "between",
      "values": [
        {
          "value": 25000000,
          "currency": "usd"
        },
        {
          "value": 100000000,
          "currency": "usd"
        }
      ]
    },
    {
      "type": "predicate",
      "field_id": "location_identifiers",
      "operator_id": "includes",
      "values": [
        "6106f5dc-823e-5da8-40d7-51612c0b2c4e"
      ]
    },
    {
      "type": "predicate",
      "field_id": "facet_ids",
      "operator_id": "includes",
      "values": [
        "company"
      ]
    }
  ],
  "limit": 50
}

# Make the API request
response = requests.post(base_url + endpoint, json=payload, headers=headers)
print(response.status_code)
print(response.headers)

try:
    response.raise_for_status()
    data = response.json()
except requests.exceptions.RequestException as e:
    print(e)
    print(f'response: {response.text}')


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.crunchbase.com:443
DEBUG:urllib3.connectionpool:https://api.crunchbase.com:443 "POST /api/v4/searches/organizations HTTP/1.1" 400 164


400
{'Content-Type': 'application/json', 'Date': 'Sat, 30 Mar 2024 01:41:18 GMT', 'Server': 'openresty', 'x-cb-request-took': '0', 'Content-Length': '164', 'Connection': 'keep-alive'}
400 Client Error: Bad Request for url: https://api.crunchbase.com/api/v4/searches/organizations
response: [{"message":"insufficient permissions to search field funding_total","code":"MD403","entity_def_id":"organization","field_id":"funding_total","entitlement_ids":[]}]


In [None]:


# 提取公司数据
companies = []
for item in data["entities"]:
    company = {
        "name": item["properties"]["identifier"]["value"],
        "funding_rounds": [],
        "operating_data": {}
    }
    
    # 提取融资数据
    if "funding_rounds" in item["cards"]:
        for round in item["cards"]["funding_rounds"]:
            company["funding_rounds"].append({
                "amount": round["money_raised"]["value_usd"],
                "announced_on": round["announced_on"],
                "investor_count": len(round["investors"])
            })
    
    # 提取运营数据
    if "facts" in item["cards"]:
        facts = item["cards"]["facts"]
        if "company_size" in facts:
            company["operating_data"]["employee_count"] = facts["company_size"]["value_int"]
        if "ipo_status" in facts:
            company["operating_data"]["ipo_status"] = facts["ipo_status"]["value"]
        # 添加其他运营数据,如收入、利润等(如果有的话)
    
    companies.append(company)
