    Matrix Creation

In [4]:
import pandas as pd

filters = {
    "Job Title": ["Title1", "Title2", "Title3"],
    "Location": ["Location1"],
    "Last Update": ["LastUpdate1"],
    "Keyword1": ["Keyword1.1", "Keyword1.2", "Keyword1.3"],
    "Keyword2": ["Keyword2.1", "Keyword2.2", "Keyword2.3"],
}

matrix = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in filters.items()]))
matrix = matrix.fillna("")

print(matrix)

  Job Title   Location  Last Update    Keyword1    Keyword2
0    Title1  Location1  LastUpdate1  Keyword1.1  Keyword2.1
1    Title2                          Keyword1.2  Keyword2.2
2    Title3                          Keyword1.3  Keyword2.3


In [1]:
import pandas as pd
from datetime import datetime, timedelta

filters = {
    "Job Title": ["Product Manager", "Product Owner", "Technical Product Manager"],
    "Location": ["United States"],
    "Last Update": [(datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")],
    "Keyword1": ["Agile", "Scrum", "Kanban"],
    "Keyword2": ["Roadmap", "Backlog", "Feature Prioritization"],
}

matrix = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in filters.items()]))
matrix = matrix.fillna("")

In [159]:
matrix.head()

Unnamed: 0,Job Title,Location,Last Update,Keyword1,Keyword2
0,Product Manager,United States,2024-12-30 07:00:53,Agile,Roadmap
1,Product Owner,,,Scrum,Backlog
2,Technical Product Manager,,,Kanban,Feature Prioritization


- Since __Location__ and __Last Update__ have no synonyms or variability, they act as "global filters" that must be applied in every search query. So I'll use them at the start of the tree to reduce candidates right away.

---

    API Search function

In [189]:
import requests
import json

In [3]:
def run_api_search(filters):
    base_url = "https://api.coresignal.com/cdapi/v1/professional_network/employee/search/filter"
    
    payload = {
        "experience_deleted": False,
        "active_experience": True,
        "country": filters.get("Location", ""),
        "last_updated_gte": (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
    }

    if "Job Title" in filters:
        payload["title"] = filters["Job Title"]
    
    if "Keyword" in filters:
        payload["keyword"] = filters["Keyword"]

    headers = {
        'Content-Type': 'application/json',
        'Authorization': bearer
    }

    all_ids = []
    next_page_after = None

    while True:
        # Modify URL for pagination if needed
        request_url = base_url if not next_page_after else f"{base_url}?after={next_page_after}"

        response = requests.post(request_url, headers=headers, data=json.dumps(payload))

        if response.status_code != 200:
            print(f"Error: API request failed with status {response.status_code}")
            print(f"Response Text: {response.text}")
            break
        
        try:
            response_json = response.json()
            all_ids.extend(response_json)

            # Extract pagination info
            next_page_after = response.headers.get("x-next-page-after")

            # Ensure next_page_after is a valid integer
            if next_page_after and next_page_after.isdigit():
                next_page_after = int(next_page_after)  # Convert to integer
            else:
                break  # Stop if no more pages

        except json.JSONDecodeError:
            print("Error: Invalid JSON response")
            break

    return all_ids, len(all_ids)


In [None]:
filters = {
    "Location": "United States",
}

list, count = run_api_search(filters)

In [179]:
count

2921

---


    Tree Creation

In [205]:
def create_level_zero(matrix):
    root = {
        "filters": {
            "Location": matrix["Location"].iloc[0],
            "Last Update": matrix["Last Update"].iloc[0]
        },
        "children": []
    }

    return root

In [206]:
root = create_level_zero(matrix)

In [207]:
root

{'filters': {'Location': 'United States',
  'Last Update': '2024-12-30 07:00:53'},
 'children': []}

In [208]:
def create_level_one(root, matrix):
    job_titles = matrix["Job Title"].dropna().tolist()
    keyword1_values = matrix["Keyword1"].dropna().tolist()
    keyword2_values = matrix["Keyword2"].dropna().tolist()

    branch1 = {
        "filters": {
            "Location": root["filters"]["Location"],
            "Last Update": root["filters"]["Last Update"],
            "Job Title": f'"{job_titles[0]}"' if job_titles else "",
            "Keyword": f'"{keyword1_values[0]}"' if keyword1_values else ""
        },
        "children": []
    }

    branch2 = {
        "filters": {
            "Location": root["filters"]["Location"],
            "Last Update": root["filters"]["Last Update"],
            "Job Title": f'"{job_titles[0]}"' if job_titles else "",
            "Keyword": f'"{keyword2_values[0]}"' if keyword2_values else ""
        },
        "children": []
    }

    root["children"].extend([branch1, branch2])

In [209]:
create_level_one(root, matrix)

In [210]:
root

{'filters': {'Location': 'United States',
  'Last Update': '2024-12-30 07:00:53'},
 'children': [{'filters': {'Location': 'United States',
    'Last Update': '2024-12-30 07:00:53',
    'Job Title': '"Product Manager"',
    'Keyword': '"Agile"'},
   'children': []},
  {'filters': {'Location': 'United States',
    'Last Update': '2024-12-30 07:00:53',
    'Job Title': '"Product Manager"',
    'Keyword': '"Roadmap"'},
   'children': []}]}

In [190]:
for idx, child in enumerate(root["children"], start=1):
    filters = child['filters']  
    ids, num_ids = run_api_search(filters)
    print(f"Level 1 - Child {idx}")
    print(f"Filters Sent to API: {json.dumps(filters, indent=2)}")
    print(f"Num IDs Returned: {num_ids}\n")


Level 1 - Child 1
Filters Sent to API: {
  "Location": "United States",
  "Last Update": "2024-12-30 07:00:53",
  "Job Title": "\"Product Manager\"",
  "Keyword": "\"Agile\""
}
Num IDs Returned: 13315

Level 1 - Child 2
Filters Sent to API: {
  "Location": "United States",
  "Last Update": "2024-12-30 07:00:53",
  "Job Title": "\"Product Manager\"",
  "Keyword": "\"Roadmap\""
}
Num IDs Returned: 8576



In [211]:
def create_level_two(root, matrix):
    job_titles = matrix["Job Title"].dropna().tolist()

    for child in root["children"]:  # Iterate through Level 1 branches
        if len(job_titles) > 1:  # Ensure there are at least 2 job titles
            new_branch = {
                "filters": {
                    "Location": child["filters"]["Location"],
                    "Last Update": child["filters"]["Last Update"],
                    "Job Title": f'"{job_titles[0]}" AND "{job_titles[1]}"',
                    "Keyword": child["filters"]["Keyword"]  # Keep existing keyword filter
                },
                "children": []
            }
            child["children"].append(new_branch)  # Add the new branch to Level 1 child

In [212]:
create_level_two(root, matrix)

In [213]:
root

{'filters': {'Location': 'United States',
  'Last Update': '2024-12-30 07:00:53'},
 'children': [{'filters': {'Location': 'United States',
    'Last Update': '2024-12-30 07:00:53',
    'Job Title': '"Product Manager"',
    'Keyword': '"Agile"'},
   'children': [{'filters': {'Location': 'United States',
      'Last Update': '2024-12-30 07:00:53',
      'Job Title': '"Product Manager" AND "Product Owner"',
      'Keyword': '"Agile"'},
     'children': []}]},
  {'filters': {'Location': 'United States',
    'Last Update': '2024-12-30 07:00:53',
    'Job Title': '"Product Manager"',
    'Keyword': '"Roadmap"'},
   'children': [{'filters': {'Location': 'United States',
      'Last Update': '2024-12-30 07:00:53',
      'Job Title': '"Product Manager" AND "Product Owner"',
      'Keyword': '"Roadmap"'},
     'children': []}]}]}

In [203]:
for idx, child in enumerate(root["children"], start=1):
    for sub_idx, sub_child in enumerate(child["children"], start=1):
        ids, num_ids = run_api_search(sub_child["filters"])
        print(f"Level 2 - Child {idx}.{sub_idx}")
        print(f"Filters: {json.dumps(sub_child['filters'], indent=2)}")
        print(f"Num IDs Returned: {num_ids}\n")


Level 2 - Child 1.1
Filters: {
  "Location": "United States",
  "Last Update": "2024-12-30 07:00:53",
  "Job Title": "\"Product Manager\" AND \"Product Owner\"",
  "Keyword": "\"Agile\""
}
Num IDs Returned: 782

Level 2 - Child 2.1
Filters: {
  "Location": "United States",
  "Last Update": "2024-12-30 07:00:53",
  "Job Title": "\"Product Manager\" AND \"Product Owner\"",
  "Keyword": "\"Roadmap\""
}
Num IDs Returned: 325



----

In [22]:
import json
import requests

In [None]:
url = "https://api.coresignal.com/cdapi/v1/professional_network/employee/search/filter"

payload = json.dumps({
    "experience_deleted": False,
    "keyword": '\"performance marketing manager\" OR (Digital Marketer) OR (Paid Media Manager)" AND "(Google Analytics) OR (Google) OR (Attribution Marketing) OR (Attribution)”',
    "location": "Virginia"
})

headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer '
}

response = requests.request("POST", url, headers=headers, data=payload)

In [24]:
len(response.json())

1000

In [None]:
url = "https://api.coresignal.com/cdapi/v1/professional_network/employee/search/filter"

payload = json.dumps({
    "experience_deleted": False,
    "keyword": '\"performance marketing manager\" OR (Digital Marketer) OR (Paid Media Manager)" AND "(Google Analytics) OR (Google) OR (Attribution Marketing) OR (Attribution)”',
    "location": "Virginia"
})

headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer '
}

all_ids = []
next_page_after = None

while True:
    # Modify URL for pagination if needed
    request_url = url if not next_page_after else f"{url}?after={next_page_after}"

    response = requests.post(request_url, headers=headers, data=json.dumps(payload))

    if response.status_code != 200:
        print(f"Error: API request failed with status {response.status_code}")
        print(f"Response Text: {response.text}")
        break
    
    try:
        response_json = response.json()
        all_ids.extend(response_json)

        # Extract pagination info
        next_page_after = response.headers.get("x-next-page-after")

        # Ensure next_page_after is a valid integer
        if next_page_after and next_page_after.isdigit():
            next_page_after = int(next_page_after)  # Convert to integer
        else:
            break  # Stop if no more pages

    except json.JSONDecodeError:
        print("Error: Invalid JSON response")
        break

print(f"Total IDs Retrieved: {len(all_ids)}")


Error: API request failed with status 422
Response Text: {"detail":[{"type":"model_attributes_type","loc":["body"],"msg":"Input should be a valid dictionary or object to extract fields from","input":"{\"experience_deleted\": false, \"keyword\": \"\\\"performance marketing manager\\\" OR (Digital Marketer) OR (Paid Media Manager)\\\" AND \\\"(Google Analytics) OR (Google) OR (Attribution Marketing) OR (Attribution)\\u201d\", \"location\": \"Virginia\"}"}]}
Total IDs Retrieved: 0


In [None]:
from datetime import datetime, timedelta
import requests
import json

url = "https://api.coresignal.com/cdapi/v1/professional_network/employee/search/filter"

payload = {
    #"last_updated_gte": (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S"),
    #"experience_deleted": False,
    #"active_experience": True,
    "location": "New York",
    #"experience_title": '((Product Manager) OR (Product Owner) OR (Technical Product Manager))',
    "keyword": '((Agile) OR (Scrum) OR (Kanban)) AND ((Roadmap) OR (Backlog) OR (Feature Prioritization))'
}
# keywords: cap it at 10 total -> 7 synonyms AND 3 seniority

headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer '
}

all_ids = []
next_page_after = None

while True:
    # Modify URL for pagination if needed
    request_url = url if not next_page_after else f"{url}?after={next_page_after}"

    response = requests.post(request_url, headers=headers, data=json.dumps(payload))

    if response.status_code != 200:
        print(f"Error: API request failed with status {response.status_code}")
        print(f"Response Text: {response.text}")
        break
    
    try:
        response_json = response.json()
        all_ids.extend(response_json)

        # Extract pagination info
        next_page_after = response.headers.get("x-next-page-after")

        # Ensure next_page_after is a valid integer
        if next_page_after and next_page_after.isdigit():
            next_page_after = int(next_page_after)  # Convert to integer
        else:
            break  # Stop if no more pages

    except json.JSONDecodeError:
        print("Error: Invalid JSON response")
        break

print(f"Total IDs Retrieved: {len(all_ids)}")


Total IDs Retrieved: 11788


In [43]:
all_ids

[3226282,
 9836166,
 52724263,
 56420983,
 57406051,
 83449255,
 85607911,
 90041848,
 99013345,
 99717712,
 109277262,
 110883456,
 119275034,
 139162117,
 166236073,
 190948396,
 191451426,
 205300155,
 212024014,
 221007616,
 222326232,
 247139288,
 269583625,
 272329835,
 273300408,
 276626210,
 278076375,
 280424590,
 281258824,
 285482396,
 287309283,
 288410079,
 351782518,
 352329504,
 399951145,
 401292335,
 422679749,
 434125833,
 448452349,
 457160283,
 460384340,
 467001896,
 469968920,
 475855688,
 504984175,
 529216240,
 573635424,
 574418712,
 577268817,
 585130624,
 626686525,
 634372057,
 792873049,
 801426361]

In [None]:
def collect_employees(ids):
    base_url = 'https://api.coresignal.com/cdapi/v1/professional_network/employee/collect'
    
    headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer '
    }

    profiles = {}

    for id in ids:
        url = f"{base_url}/{id}"
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            profiles[id] = response.json()
        else:
            print("Error", f"\nResponse:{response.text}")
    
    return profiles
            

In [44]:
test_ids = [all_ids[0], all_ids[1]]
profiles = collect_employees(test_ids)

In [45]:
test_ids

[3226282, 9836166]

In [46]:
print(json.dumps(profiles, indent=2))

{
  "3226282": {
    "id": 3226282,
    "name": "Jessica Campomanes",
    "first_name": "Jessica",
    "last_name": "Campomanes",
    "title": "Performance Marketing Manager at Guardian Life",
    "user_generated_headline": "Performance Marketing Manager at Guardian Life",
    "url": "https://www.linkedin.com/in/jcampomanes",
    "hash": "3441edd8b09a8219437a663a383967fd",
    "canonical_url": "https://www.linkedin.com/in/jcampomanes",
    "canonical_hash": "3441edd8b09a8219437a663a383967fd",
    "member_shorthand_name": "jcampomanes",
    "member_shorthand_name_hash": "09fd10c875acbe547569203456280e25",
    "canonical_shorthand_name": "jcampomanes",
    "canonical_shorthand_name_hash": "09fd10c875acbe547569203456280e25",
    "created": "2016-07-23 02:34:11",
    "last_updated": "2024-11-15 22:47:29",
    "last_updated_ux": 1731710849,
    "deleted": 0,
    "last_response_code": 200,
    "logo_url": "https://static.licdn.com/aero-v1/sc/h/9c8pery4andzj6ohjkjp54ma2",
    "summary": "Thou