In [14]:
# | default_exp integrations.ProxyCurl

# ProxyCurl

> classes and functions for using GPT to interact with ProxyCurl

In [15]:
# | exporti
from dataclasses import dataclass, field
import httpx
import udemy_langchain.client as gd
import inspect

In [16]:
# | hide
from nbdev.showdoc import show_doc

# Auth

In [17]:
# | export


@dataclass
class ProxyCurlAuth(gd.Auth):
    """this library uses proxycurl to interact with LinkedIn APIs
    setup an account at https://proxycurl.com/
    """

    api_key: str = field(repr=False)  # key from proxycurl
    headers: dict = field(repr=False, default_factory=dict)

    def get_auth_headers(self):
        """interal method for generating a properly formatted proxycurl auth header"""
        return {"Authorization": f"Bearer {self.api_key}"}

In [18]:
show_doc(ProxyCurlAuth)

---

[source](https://github.com/jaewilson07/udemy-langchain/blob/main/udemy_langchain/integrations/ProxyCurl.py#L14){target="_blank" style="float:right; font-size:smaller"}

### ProxyCurlAuth

>      ProxyCurlAuth (api_key:str, headers:dict=<factory>)

this library uses proxycurl to interact with LinkedIn APIs
setup an account at https://proxycurl.com/

In [19]:
pc_auth = ProxyCurlAuth(api_key="hello_world")

pc_auth.get_auth_headers()

{'Authorization': 'Bearer hello_world'}

# Routes

In [20]:
# | export


async def get_linked_in_profile_route(
    auth: ProxyCurlAuth,
    linkedin_profile_url: str = None,
    client: httpx.AsyncClient = None,
    json_cache_path: str = None,
    is_ignore_cache: bool = False,
    parent_class: str = None,
    debug_api: bool = False,
):
    """gets a linkedin profiel via the proxycurl API"""

    url = "https://nubela.co/proxycurl/api/v2/linkedin"

    if not json_cache_path:
        url_id = url.replace("https://www.linkedin.com/in/", "")
        url_id = url_id.split("/")[0]
        json_cache_path = "CACHE\linkedin_profile_{url_id}.json"

    params = {
        "linkedin_profile_url": linkedin_profile_url,
        #     "twitter_profile_url": "https://twitter.com/johnrmarty/",
        #     "facebook_profile_url": "https://facebook.com/johnrmarty/",
        #     "extra": "include",
        #     "github_profile_id": "include",
        #     "facebook_profile_id": "include",
        #     "twitter_profile_id": "include",
        #     "personal_contact_number": "include",
        #     "personal_email": "include",
        #     "inferred_salary": "include",
        #     "skills": "include",
        #     "use_cache": "if-present",
        #     "fallback_to_cache": "on-error",
    }

    res = await gd.get_data(
        url=url,
        params=params,
        auth=auth,
        method="GET",
        client=client,
        json_cache_path=json_cache_path,
        is_ignore_cache=is_ignore_cache,
        parent_class=parent_class,
        debug_api=debug_api,
    )

    return res

In [21]:
show_doc(get_linked_in_profile_route)

---

[source](https://github.com/jaewilson07/udemy-langchain/blob/main/udemy_langchain/integrations/ProxyCurl.py#L27){target="_blank" style="float:right; font-size:smaller"}

### get_linked_in_profile_route

>      get_linked_in_profile_route (auth:__main__.ProxyCurlAuth,
>                                   linkedin_profile_url:str=None,
>                                   client:httpx.AsyncClient=None,
>                                   json_cache_path:str=None,
>                                   is_ignore_cache:bool=False,
>                                   parent_class:str=None, debug_api:bool=False)

gets a linkedin profiel via the proxycurl API

In [22]:
pc_auth = ProxyCurlAuth(api_key="hello_world")


await get_linked_in_profile_route(
    auth=pc_auth,
    linkedin_profile_url="https://www.linkedin.com/in/eden-marco",
    debug_api=False,
    json_cache_path="../TEST/linkedin_profile_eden_marco.json",
)

ResponseGetData(is_from_cache=True, is_success=True, status=200, response={'public_identifier': 'eden-marco', 'profile_pic_url': 'https://s3.us-west-000.backblazeb2.com/proxycurl/person/eden-marco/profile?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=0004d7f56a0400b0000000001%2F20230601%2Fus-west-000%2Fs3%2Faws4_request&X-Amz-Date=20230601T061559Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=7cc4fce4ad1ef7d8ab1ffc76c9e9ed89e6de6ae593d315b6e7297145457c7174', 'background_cover_image_url': None, 'first_name': 'Eden', 'last_name': 'Marco', 'full_name': 'Eden Marco', 'follower_count': None, 'occupation': 'Customer Engineer at Google', 'headline': 'Customer Engineer @ Google Cloud | Best-selling Udemy Instructor', 'summary': 'Backend developer, Udemy.com best seller instructor\n', 'country': 'IL', 'country_full_name': 'Israel', 'city': None, 'state': None, 'experiences': [{'starts_at': {'day': 1, 'month': 6, 'year': 2022}, 'ends_at': None, 'company': 'Google', 'company_lin

# Classes and Implementations

In [23]:
# | export
async def scrape_linkedin_profile(
    auth: ProxyCurlAuth,
    linkedin_profile_url: str = None,
    client: httpx.AsyncClient = None,
    json_cache_path: str = None,
    is_ignore_cache: bool = False,
    debug_api: bool = False,
):
    """scrape information from LinkedIn profiles via proxy curl"""

    res = await get_linked_in_profile_route(
        auth=auth,
        linkedin_profile_url=linkedin_profile_url,
        client=client,
        json_cache_path=json_cache_path,
        is_ignore_cache=is_ignore_cache,
        debug_api=debug_api,
    )

    profile_obj = res.response

    profile_obj = {
        key: value
        for key, value in profile_obj.items()
        if value and key not in ["people_also_viewed", "certifications"]
    }

    if profile_obj.get("groups"):
        for group_obj in profile_obj["groups"]:
            group_obj.pop("profile_pic_url")

    return profile_obj

In [24]:
show_doc(scrape_linkedin_profile)

---

[source](https://github.com/jaewilson07/udemy-langchain/blob/main/udemy_langchain/integrations/ProxyCurl.py#L76){target="_blank" style="float:right; font-size:smaller"}

### scrape_linkedin_profile

>      scrape_linkedin_profile (auth:__main__.ProxyCurlAuth,
>                               linkedin_profile_url:str=None,
>                               client:httpx.AsyncClient=None,
>                               json_cache_path:str=None,
>                               is_ignore_cache:bool=False,
>                               debug_api:bool=False)

scrape information from LinkedIn profiles

In [25]:
pc_auth = ProxyCurlAuth(api_key="hello_world")

await scrape_linkedin_profile(
    auth=pc_auth,
    linkedin_profile_url="https://www.linkedin.com/in/eden-marco",
    debug_api=False,
    json_cache_path="../TEST/linkedin_profile_eden_marco.json",
)

{'public_identifier': 'eden-marco',
 'profile_pic_url': 'https://s3.us-west-000.backblazeb2.com/proxycurl/person/eden-marco/profile?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=0004d7f56a0400b0000000001%2F20230601%2Fus-west-000%2Fs3%2Faws4_request&X-Amz-Date=20230601T061559Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=7cc4fce4ad1ef7d8ab1ffc76c9e9ed89e6de6ae593d315b6e7297145457c7174',
 'first_name': 'Eden',
 'last_name': 'Marco',
 'full_name': 'Eden Marco',
 'occupation': 'Customer Engineer at Google',
 'headline': 'Customer Engineer @ Google Cloud | Best-selling Udemy Instructor',
 'summary': 'Backend developer, Udemy.com best seller instructor\n',
 'country': 'IL',
 'country_full_name': 'Israel',
 'experiences': [{'starts_at': {'day': 1, 'month': 6, 'year': 2022},
   'ends_at': None,
   'company': 'Google',
   'company_linkedin_profile_url': 'https://www.linkedin.com/company/google/',
   'title': 'Customer Engineer',
   'description': None,
   'location': 'Tel Avi

In [26]:
# | hide
from nbdev.showdoc import *