## Libraries
    Importing all the necessary libraries

In [1]:
import requests 
from urllib.parse import quote,unquote
import urllib.parse
import re
import unidecode
import pandas as pd


## Text Normalization 
    Cleaning the text from emojis and other unicode icons

    link to the original thread >> https://gist.github.com/slowkow/7a7f61f495e3dbb7e3d767f97bd7304b

In [2]:
def clean_data(data):
    #Create my target strings. All the commonly used emojis and swpecial characters i need to clean from my results.
    emoj = re.compile("["
                u"\U0001F600-\U0001F64F"  # emoticons
                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                u"\U0001F680-\U0001F6FF"  # transport & map symbols
                u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                u"\U00002500-\U00002BEF"  # chinese char
                u"\U00002702-\U000027B0"
                u"\U00002702-\U000027B0"
                u"\U000024C2-\U0001F251"
                u"\U0001f926-\U0001f937"
                u"\U00010000-\U0010ffff"
                u"\u2640-\u2642" 
                u"\u2600-\u2B55"
                u"\u200d"
                u"\u23cf"
                u"\u23e9"
                u"\u231a"
                u"\ufe0f"  # dingbats
                u"\u3030"
                                            "]+", re.UNICODE)

    # remove emojis
    cleaned = re.sub(emoj, '', data).strip()
    
    # convert german umlauts before removing diacritics
    cleaned = cleaned.replace('Ü','Ue').replace('Ä','Ae').replace('Ö', 'Oe').replace('ü', 'ue').replace('ä', 'ae').replace('ö', 'oe')
    
    # convert semicolon to colon to prevent CSV breaking
    cleaned = cleaned.replace(',', '')
    cleaned = cleaned.replace(';', ',')
    
    # remove diacritics
    cleaned = unidecode.unidecode(cleaned)
    
    #return the final clean text stripped from any spaces
    return cleaned.strip()

## This is the first function that gives us the first record for the dataframe and contains the information of the target_user that we want to extract the contacts from

In [3]:
def employee_information(employee):
     
        #Create an error proof function that in case of no response or incorrect input will continue the loop
        try: 
            #create a list from the name of the user splitted by spaces. e.x. 'Filippos Dorezi' >> ['Filippos','Dorezi']
            account_name = clean_data(employee["title"]["text"]).split(" ") 
            
            #Creating a list of common name titles I want to remove from the name of the user
            badwords = ['Prof.', 'Dr.', 'M.A.', ',', 'LL.M.'] 
            
            #text_normalization loop
                        
            for word in list(account_name):
                if word in badwords:
                    account_name.remove(word)
            account_name = " ".join(account_name)
            
        except:
            
            #In case there was an issue with the request call or the parameter parsed in the function we move to the next iteration
             pass
        
        
        # Search for the position of the user 
        
        try:
            position = clean_data(employee["primarySubtitle"]["text"])
            
        except:
            #In case we cannot retrieve the position we assign a nonAvailable value
            position = "N/A"

        # Search for the location
        try: 
            location = employee["secondarySubtitle"]["text"]
        except:
            #In case we cannot retrieve the location we assign a nonAvailable value
            location = "N/A"

        # Search for the profileLink. The profile link is what we are going to use in order to search and retrieve the further information for each of the users and their contacts.
        try:
            profile_link = employee["navigationUrl"].split("?")[0]
            profile_link = unquote(profile_link)
        except:
            #In case we cannot retrieve the profile_link we assign a nonAvailable value
            profile_link = "N/A"

        
        # Search the connection_degree
        try:
            degree_str = employee["entityCustomTrackingInfo"]["memberDistance"]
            degree = int(degree_str.split("DISTANCE_")[-1])
        except:
            #when the iteration is about the user_target then the degree is 0
            degree = 0
            
        
        # Search the uniqueID. Each user has a unique ID in linkedIn backend system. We capture and save the id to use it as a primary key and for further https requests
     
        try:
            
            contact_id_urn = employee['image']['attributes'][0]['detailData']['nonEntityProfilePicture']['profile']['entityUrn'].split(":")[-1]
        
        except: 
            #In case we cannot retrieve the uniqueID we assign a nonAvailable value
            contact_id_urn = "N/A"
            
        
        # Search the userCode, which is the last part of the url and that we are going to use in order to parse it in our functions.
        # Name of the LinkedIn account. 
        # e.x: https://www.linkedin.com/in/XXXX/ >> employee_information(XXXX)
        
        try:
            contact_userCode = employee["navigationUrl"].split("?")[0].split("/")[-1]
            contact_userCode = unquote(contact_userCode)
            
        except:
            #In case we cannot retrieve the userCode we assign a nonAvailable value
            contact_userCode = "N/A" 
            
            
        ## return a dictionary with all the user's information
        return {"userID": contact_id_urn,"userCode": contact_userCode,"Nombre":account_name, "Puesto": position, "Grado":degree, "Ubicacion":location, "Link":profile_link}


## Information from the contacts of the target_user
    WE search and extract a list with the information for each of the contacts of our target_contact.
    In order to do so we need 2 cookies from our navigator, li_at and JSESSIONID.

In [2]:
# li_at <- Cookie information about the active session
# JSESSIONID <- Cookie information about the active session
# target_user <- Name of the LinkedIn account. 
# e.x: https://www.linkedin.com/in/XXXX/ >> employee_information(XXXX)

def contacts(user, li_at, JSESSIONID):
    
    headers = {
        # Set the 'User-Agent' to simulate a request coming from a specific browser (in this case, Edge on Windows 10).
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0', 
        # Specify the content type of the request body as JSON.
        'Content-type': 'application/json', 
        # Include a CSRF token for security purposes, typically used to prevent cross-site request forgery attacks.
        'Csrf-Token': JSESSIONID,
        # Set cache control to avoid storing the response and revalidate each request.
        'Cache-Control': 'max-age=0',
        # Maintain a persistent connection for multiple requests, reducing the overhead of re-establishing connections.
        'Connection': 'keep-alive'
    }
    # Create a dictionary to store cookies, where 'li_at' and 'JSESSIONID' are keys and their corresponding values are variables.
    cookies_dict = {"li_at": li_at, "JSESSIONID": JSESSIONID}
    # URL-encode the 'user' variable to ensure it's safe to include in a URL or HTTP request.
    cod_user = quote(user)
    
    
    # Get the information of the target
    # Construct the URL for the LinkedIn GraphQL API request, including the URL-encoded user variable (vanity name) as part of the query parameters.
    url = f"https://www.linkedin.com/voyager/api/graphql?variables=(vanityName:{cod_user})&queryId=voyagerIdentityDashProfiles.d8946426aeb23ab919d181f179a67a4d"
    # Send a GET request to the constructed URL with the specified headers and cookies for authentication and session management.
    response = requests.get(url, headers=headers, cookies=cookies_dict)
    # Parse the JSON response from the server and store it in the variable 'r'.
    r = response.json()
    
    
    # Information of the target
    # Extract the first element of the 'identityDashProfilesByMemberIdentity' list, which contains the target user's profile information.
    user_info = r['data']['identityDashProfilesByMemberIdentity']['elements'][0]
    # Extract the user's unique ID (URN) by splitting the 'entityUrn' string and taking the last part.
    id_urn = user_info['entityUrn'].split(":")[-1]
    # Concatenate the user's first and last names to form their full name.
    name = user_info['firstName'] + " " + user_info['lastName']
    # Extract the user's current position or headline (usually their job title).
    position = user_info['headline']
    # Extract the user's location, using the default localized name from the geoLocation data.
    location = user_info['geoLocation']['geo']['defaultLocalizedName']
    
    #create a df with the information and initialize a list for the contacts
    user_info = pd.DataFrame({"userID": [id_urn],"userCode": [cod_user],"Nombre": [name], "Puesto": [position], "Grado": [0], "Ubicacion": [location], "Link": ["https://www.linkedin.com/in/" + user]})
    list_contacts = []
    
          
    # get all the contacts iterating witha step of 3 that is the max return of the call and append the information in the list 
    
    for i in range(0,500,3):
        # Construct the URL for the LinkedIn GraphQL API request, including pagination (start index) and search query parameters.
        # The query is set to search for people who are connections of the user identified by 'id_urn'.
   
        url = f"https://www.linkedin.com/voyager/api/graphql?variables=(start:{i},origin:MEMBER_PROFILE_CANNED_SEARCH,query:(flagshipSearchIntent:SEARCH_SRP,queryParameters:List((key:connectionOf,value:List({id_urn})),(key:network,value:List(F,S,O)),(key:resultType,value:List(PEOPLE))),includeFiltersInResponse:false))&queryId=voyagerSearchDashClusters.cc5c3924cc0402d1d8838b15bc96aa0b"
        
        # Send a GET request to the constructed URL with the specified headers and cookies for authentication and session management.
        response = requests.get(url, headers=headers, cookies=cookies_dict)
        r = response.json()
        
        # appending the information of each of the contacts
        try:
            # Extract the list of users (contacts) from the response. This data is nested within 'searchDashClustersByAll'.
            list_users = r['data']['searchDashClustersByAll']['elements'][0]['items']
            
            # Iterate over each user in the list and extract their information using the 'employee_information' function,
            # then append the result to the 'list_contacts' list.
            for user in list_users:
                list_contacts.append(employee_information(user["item"]['entityResult']))
                
        except:
            
            pass
    
    
    #Merge the information of the target and his contacts into a unique dataframe the targets information will be the first row with a zero degree connection with himself
    contacts = pd.DataFrame(list_contacts)
    contacts = contacts.drop_duplicates()
    contacts = contacts[contacts['Nombre'] != "LinkedIn Member"]
    contacts = pd.concat([user_info, contacts], ignore_index=True)
    
    return contacts

## Variables that we need in order to run the Script 
    Assign the values to all of the necessary variables that we need to run the script.
    Run the scripts that returns a structured dataFRame with the information of the target_user's contacts.

In [7]:
li_at = "AQEDASjwWc8FcCvLAAABkVYxVtcAAAGRej3a100Af3OmGnMp4ZwEm6ILOmbItqD_jdTsraAIgeG123LRy86dNqxiPW3qJNXv_U1iyZfi5Et_d-kdsE-apANqMCQ9r4cnKWm3tM7GGO4j26AD_rhVg4tu"
JSESSIONID = "ajax:6176132621624475853"
user = "nathalie-heijkoop"

In [8]:
contacts(user,li_at,JSESSIONID)

Unnamed: 0,userID,userCode,Nombre,Puesto,Grado,Ubicacion,Link
0,ACoAACIhobQB1CAIHrk1QnCnz0RiZuRynUSSO2A,nathalie-heijkoop,Nathalie Heijkoop,Directora Hotel en SmartRental Collection,0,"Madrid, Community of Madrid, Spain",https://www.linkedin.com/in/nathalie-heijkoop
1,ACoAAAyy8eoBLyweCBfzMrNwE7NbFiDk1q58tO4,melisa-luca-1224275b,Melisa Luca,Expansion & Investment Manager,2,Madrid,https://www.linkedin.com/in/melisa-luca-1224275b
2,ACoAADqaW9QBX6JQMUnh80d60zBG4KNkOqqjJWI,melissa-elvir-41832a235,Melissa Elvir,Gobernanta | Quality Manager |Tecnico superior...,2,Spain,https://www.linkedin.com/in/melissa-elvir-4183...
3,ACoAACh6t5oB8BRB-9Y_ewfdbV9j5LvOYh9eeeg,marta-carrion-recio-78a53316b,Marta Carrion Recio,HRBP & Payroll,2,Greater Madrid Metropolitan Area,https://www.linkedin.com/in/marta-carrion-reci...
4,ACoAADRRp1QBh51BQoU2wrorSoPvkWeL1qbsex0,carlos-ocampo-ospina,Carlos Ocampo,Software Developer en aspaNETCONOMY,2,Madrid,https://www.linkedin.com/in/carlos-ocampo-ospina
...,...,...,...,...,...,...,...
491,ACoAAAMX5jcBb_GWQ9Bg8WkjoBLNacBk4OissIE,juan-carlos-ugerman-61b46915,Juan Carlos Ugerman,Titular en Ugerman Editor,2,Argentina,https://www.linkedin.com/in/juan-carlos-ugerma...
492,ACoAACj8-RMBR0L1TJK2aMN2W3qkXRhVG-_H94A,jose-enrique-bous-serrano-86b36b172,Jose Enrique Bous Serrano,Chef ejecutivo,2,Greater Madrid Metropolitan Area,https://www.linkedin.com/in/jose-enrique-bous-...
493,ACoAAAZCkAIBGp2KzkjfAmwiyjYZeDOLnvLcXkU,emmanuellegris,Emmanuel LEGRIS,Travel | Hospitality | Tech,2,Sigean,https://www.linkedin.com/in/emmanuellegris
494,ACoAACY0WGMBO_Q6TkABw-FaBM0SXC35V--X2VA,jamal-khan-jr🇪🇸-7977a815a,Jamal Khan Jr,Guest Experience & Front Executive at Front Of...,2,Greater Barcelona Metropolitan Area,https://www.linkedin.com/in/jamal-khan-jr🇪🇸-79...
