In [1]:
import csv
import json
import os
import requests
from dotenv import load_dotenv
from pathlib import Path

env_path = Path('../..') / '.env'
load_dotenv(dotenv_path=env_path)

ISCRAPER_API_KEY = os.environ.get('ISCRAPER_API_KEY')
PROFILE_DETAILS_URL = "https://api.iscraper.io/v2/profile-details"
        
        

In [2]:
def call_iscraper(linkedin_id: str):
    payload = json.dumps(
        {
            "profile_id": linkedin_id,
            "profile_type": "personal",
            "network_info": True,
        }
    )
    headers = {"X-API-KEY": ISCRAPER_API_KEY, "Content-Type": "application/json"}

    response = requests.request(
        "POST", PROFILE_DETAILS_URL, headers=headers, data=payload
    )
    data = json.loads(response.text)

    return data

In [11]:
# FOR CURATIVE

with open('FILLMEIN.csv', 'r') as rf:
    reader = csv.reader(rf, delimiter=',')
    column_names = next(reader)
    print(column_names)

    linkedin_url_index = column_names.index('LinkedIn Profile')

    with open('written_curative.csv', 'w') as wf:
        writer = csv.writer(wf, delimiter=',')
        writer.writerow(column_names)
        for row in reader:
            linkedin_url = row[linkedin_url_index]
            linkedin_id = linkedin_url.strip('/').split('/')[-1]
            if linkedin_id == "":
                continue
            print(linkedin_id)

            # Get iScraper data
            data = call_iscraper(linkedin_id)

            # Get follower count
            if 'network_info' not in data:
                connections_count = -1
                followers_count = -1
            else:
                if 'connections_count' not in data['network_info']:
                    connections_count = -1
                else:
                    connections_count = data['network_info']['connections_count']
                if 'followers_count' not in data['network_info']:
                    followers_count = -1
                else:
                    followers_count = data['network_info']['followers_count']

            # Get bio
            if 'summary' not in data:
                summary = ''
            else:
                summary = data['summary']
            li_bio_present = True if summary != '' else False

            print(row[:-2] + [followers_count, li_bio_present])
            
            writer.writerow(row[:-2] + [followers_count, li_bio_present])



['LinkedIn Profile', 'Definitive Executive ID', 'Executive Name', 'Title', 'Standardized Title', 'Email', 'Direct Phone', 'Location Phone', 'Definitive ID', 'Physician Group Name', 'Physician Group Type', 'Address', 'City', 'State', '# of Physicians', 'Follower Count', 'LI Bio Present? (T/F)']
anna-fisher-929993162
['https://www.linkedin.com/in/anna-fisher-929993162/', '1765906', 'Anna Fisher', 'Manager of Human Resources', 'Human Resources Manager', 'afisher@iddc.net', '', '515.288.6097', '659348', 'Iowa Digestive Disease Center', 'Single/Multi-Specialty Physician Group', '1378 Nw 124th St', 'Clive', 'IA', '14', 1, True]
chrysta-bosley-38bb70a5
['https://www.linkedin.com/in/chrysta-bosley-38bb70a5/', '2579198', 'Chrysta Bosley', 'Practice Administrator', 'Practice Administrator', 'cbosley@dsmcapitalortho.com', '', '515.200.3594', '648259', 'Capital Orthopaedics ', 'Single/Multi-Specialty Physician Group', '12499 University Ave', 'Clive', 'IA', '11', 140, True]
chelsea-greene-5352ab54&

In [None]:
## FOR RAMP

with open('FILLMEIN.csv', 'r') as rf:
    reader = csv.reader(rf, delimiter=',')
    column_names = next(reader)
    # print(column_names)

    linkedin_url_index = column_names.index('CONTACT_LINKEDIN_PROFILE_URL')
    connection_count_index = column_names.index('Connection count')
    linkedin_bio_index = column_names.index('Linkedin Bio?')

    with open('written_ramp.csv', 'w') as wf:
        writer = csv.writer(wf, delimiter=',')
        writer.writerow(['CONTACT_LINKEDIN_PROFILE_URL', 'Connection Count', 'Follower Count', 'LinkedIn Bio'])

        for row in reader:
            linkedin_url = row[linkedin_url_index]
            # print(linkedin_url)
            linkedin_id = linkedin_url.strip('/').split('/')[-1]
            if linkedin_id == "":
                continue
            # print(linkedin_id)

            data = call_iscraper(linkedin_id)
            if 'network_info' not in data:
                connections_count = -1
                followers_count = -1
            else:
                if 'connections_count' not in data['network_info']:
                    connections_count = -1
                else:
                    connections_count = data['network_info']['connections_count']
                if 'followers_count' not in data['network_info']:
                    followers_count = -1
                else:
                    followers_count = data['network_info']['followers_count']

            if 'summary' not in data:
                summary = ''
            else:
                summary = data['summary']
            
            writer.writerow([linkedin_url, connections_count, followers_count, summary])
            print(f'Wrote {linkedin_url}')
