<a href="https://colab.research.google.com/github/itseriqq/linkedin-reachout-automation-scrapping/blob/main/linkedin_profiles_reachout_automationl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import json
import gspread
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.errors import HttpError
from pydantic import BaseModel
import os
from typing import List, Dict
from google import genai
import time
import requests

from google.colab import userdata

# Configura√ß√µes
BRIGHT_DATA_API_KEY = userdata.get('brightDataKey')
BRIGHT_DATA_API_URL = 'https://api.brightdata.com/scrape/linkedin'
GOOGLE_SHEETS_CREDENTIALS_FILE = 'credentials.json'
SPREADSHEET_ID = '1XgLaKmZpfcr5GMw1qDfSJw5rJVeE-a-KR4ReNixUW1o'
CURRICULO_PATH = '/content/curriculo_erick.pdf'
client = genai.Client(api_key=userdata.get('keyGemini'))


In [None]:
# Fun√ß√£o para salvar dados em JSON
def salvar_json(dados: List[Dict], caminho_arquivo: str):
    with open(caminho_arquivo, 'w', encoding='utf-8') as f:
        json.dump(dados, f, ensure_ascii=False, indent=4)
    print(f'Dados salvos em {caminho_arquivo}')

# Fun√ß√£o para carregar dados do JSON
def carregar_json(caminho_arquivo: str) -> List[Dict]:
    try:
        with open(caminho_arquivo, 'r', encoding='utf-8') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f'Arquivo {caminho_arquivo} n√£o encontrado.')
        return []

In [None]:
import pathlib
from google.genai import types

def extrair_curriculo_gemini(path: str):
    TXT_PATH = "/content/curriculo.json"
    if pathlib.Path(TXT_PATH).exists():
        print("Carregando curr√≠culo do arquivo salvo.")
        with open(TXT_PATH, 'r', encoding='utf-8') as f:
            return json.load(f)

    print("Processando PDF com Gemini...")
    filepath = pathlib.Path(path)

    if not filepath.exists():
        print(f"Erro: Arquivo {path} n√£o encontrado.")
        return {}

    print(f"Lendo PDF: {filepath}")
    with open(filepath, 'rb') as f:
        pdf_data = f.read()

    prompt = """
    Analise este curr√≠culo e retorne APENAS um objeto JSON v√°lido (sem marcadores de c√≥digo, sem texto adicional) com os seguintes campos:
    {
        "firstName": "string",
        "lastName": "string",
        "location": "string",
        "emailAddress": "string",
        "telephoneNumber": "string",
        "introduction": "string",
        "experience": [
            {
                "company": "string",
                "jobTitle": "string",
                "description": "string"
            }
        ],
        "skills": ["string"]
    }
    """

    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=[
                types.Part.from_bytes(
                    data=pdf_data,
                    mime_type='application/pdf',
                ),
                prompt
            ]
        )

        print("Resposta do Gemini recebida.")
        print("Texto da resposta:", response.text)

        if response.text:
            texto_limpo = response.text.strip()
            if texto_limpo.startswith("```json"):
                texto_limpo = texto_limpo[7:]
            if texto_limpo.startswith("```"):
                texto_limpo = texto_limpo[3:]
            if texto_limpo.endswith("```"):
                texto_limpo = texto_limpo[:-3]
            texto_limpo = texto_limpo.strip()

            print("Texto limpo:", texto_limpo)

            # Converte para dicion√°rio
            curriculo_dict = json.loads(texto_limpo)

            # Salva o JSON
            with open(TXT_PATH, 'w', encoding='utf-8') as f:
                json.dump(curriculo_dict, f, ensure_ascii=False, indent=4)
            print(f"Curr√≠culo salvo em {TXT_PATH}")

            return curriculo_dict
        else:
            print("Erro: Resposta vazia do Gemini.")
            return {}

    except json.JSONDecodeError as e:
        print(f"Erro ao parsear JSON: {e}")
        print(f"Texto recebido: {response.text}")
        return {}
    except Exception as e:
        print(f"Erro ao processar curr√≠culo: {e}")
        return {}


In [None]:
def gerar_mensagem_gemini(nome: str, empresa: str, cargo: str, curriculo: Dict) -> str:
    print(f"\n=== Gerando mensagem para {nome} ===")

    firstName = curriculo.get("firstName", "Nome")
    lastName = curriculo.get("lastName", "Sobrenome")
    introduction = curriculo.get("introduction", "")

    experiencias = curriculo.get('experience', [])
    if experiencias:
        experiencia = ' '.join([exp.get('description', '') for exp in experiencias if exp.get('description')])
    else:
        experiencia = "Desenvolvedor de software com experi√™ncia em projetos diversos"

    skills_list = curriculo.get('skills', [])
    if skills_list:
        skills = ', '.join(skills_list)
    else:
        skills = "desenvolvimento de software"

    print(f"Nome completo: {firstName} {lastName}")
    print(f"Skills: {skills[:100]}...")

    prompt = f"""
    Escreva uma mensagem de outreach personalizada para {nome}, que trabalha na {empresa} como {cargo}.
    Minha experi√™ncia: {experiencia}
    Minhas habilidades: {skills}
    Quero saber se h√° oportunidades de colabora√ß√£o ou indica√ß√£o de vagas relevantes.
    Seja profissional e direto, mas n√£o soe como spam e, como esperamos vagas em ingl√™s, preciso que seja em ingl√™s a mensagem, de forma direta e cordial.

    A mensagem deve seguir a estrutura:

    Dear {nome},

    I hope this message finds you well.

    My name is {firstName} {lastName}, and I'm a Software Engineer with experience in building full-stack applications and developing solutions. I came across your profile on LinkedIn and was particularly interested in getting a job to work with you at {empresa}, if there's any chance or role available.

    Given {empresa}'s work, I'm especially interested in exploring how my skills in {skills} might align with projects and opportunities within your teams. Although I have this tech formation and knowledge I'm really interested in working with you.

    I'm reaching out to you as an experienced professional at {empresa} to inquire if there might be any opportunities for collaboration, or if you would be open to providing an indication of relevant open positions where my background could be a good fit.

    I would be happy to share my CV or portfolio for your review. Would you be open to a brief 15-minute chat sometime this week to discuss this further?

    Thank you for your time and consideration.

    Best regards,
    {firstName} {lastName}

    Coloque a mensagem gerada entre as tags <texto> e </texto>.
    """

    print("Enviando prompt para Gemini...")
    try:
        response = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=prompt
        )

        print("Resposta recebida do Gemini.")
        print("Texto completo da resposta:")
        print(response.text[:500] + "..." if len(response.text) > 500 else response.text)

        if "<texto>" in response.text and "</texto>" in response.text:
            start = response.text.find("<texto>") + len("<texto>")
            end = response.text.find("</texto>")
            mensagem = response.text[start:end].strip()
            print("‚úì Mensagem extra√≠da com sucesso")
            return mensagem
        else:
            print("‚ö† Tags <texto> n√£o encontradas. Retornando resposta completa.")
            return response.text.strip()

    except Exception as e:
        print(f"‚úó Erro ao chamar Gemini: {e}")
        import traceback
        traceback.print_exc()
        return f"Erro ao gerar mensagem: {str(e)}"


In [None]:
from typing import Dict

def gerar_mensagem_template(nome: str, empresa: str, cargo: str, curriculo: Dict) -> str:
    print(f"\n=== Gerando mensagem para {nome} ===")

    firstName = curriculo.get("firstName", "Nome")
    lastName = curriculo.get("lastName", "Sobrenome")

    experiencias = curriculo.get('experience', [])
    if experiencias:
        experiencia = ' '.join(
            exp.get('description', '')
            for exp in experiencias
            if exp.get('description')
        )
    else:
        experiencia = "Desenvolvedor de software com experi√™ncia em projetos diversos"

    skills_list = curriculo.get('skills', [])
    if skills_list:
        skills = ', '.join(skills_list)
    else:
        skills = "desenvolvimento de software"

    print(f"Nome completo: {firstName} {lastName}")
    print(f"Skills: {skills[:100]}...")

    mensagem = f"""Dear {nome},

      I hope this message finds you well.

      My name is {firstName} {lastName}, and I'm a Software Engineer with experience in building full-stack applications and developing solutions. I came across your profile on LinkedIn and was particularly interested in getting a job to work with you at {empresa}, if there's any chance or role available.

      Given {empresa}'s work, I'm especially interested in exploring how my skills in {skills} might align with projects and opportunities within your teams. Although I have this tech formation and knowledge I'm really interested in working with you.

      I'm reaching out to you as an experienced professional at {empresa} to inquire if there might be any opportunities for collaboration, or if you would be open to providing an indication of relevant open positions where my background could be a good fit.

      I would be happy to share my CV or portfolio for your review. Would you be open to a brief 15-minute chat sometime this week to discuss this further?

      Thank you for your time and consideration.

      Best regards,
      {firstName} {lastName}
      """

    return mensagem


In [None]:
import requests
DATASET_ID = "gd_mfz5x93lmsjjjylob"

trigger_url = f"https://api.brightdata.com/datasets/v3/trigger?dataset_id={DATASET_ID}&include_errors=true"
trigger_response = requests.post(
    trigger_url,
    headers={
        "Content-Type": "application/json",
        "Authorization": f"Bearer {BRIGHT_DATA_API_KEY}",
    },
    json=[
            {
              "url": "https://www.google.com/",
              "keyword": 'site:linkedin.com/in ("Software Engineer" OR "Technical Recruiter" OR "Talent Acquisition" OR "TypeScript/JavaScript" OR "React.js" OR "Next.js")',
              "language": "en",
              "country": "US",
              "start_page": 1,
              "end_page": 10,
            }
    ],
)

snapshot_id = trigger_response.json()["snapshot_id"]

progress = None
while progress is None or progress["status"] != "ready":
    time.sleep(5)  # Wait 5 seconds
    progress_url = f"https://api.brightdata.com/datasets/v3/progress/{snapshot_id}"
    progress_response = requests.get(
        progress_url,
        headers={"Authorization": f"Bearer {BRIGHT_DATA_API_KEY}"},
    )
    progress = progress_response.json()

# Step 3: Download results
download_url = f"https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id}?format=json"
download_response = requests.get(
    download_url,
    headers={"Authorization": f"Bearer {BRIGHT_DATA_API_KEY}"},
)

results = download_response.json()
print(results)

[{'url': 'https://www.google.com/', 'keyword': 'site:linkedin.com/in ("Software Engineer" OR "Technical Recruiter" OR "Talent Acquisition" OR "TypeScript/JavaScript" OR "React.js" OR "Next.js")', 'general': {'search_engine': 'google', 'language': 'en', 'location': 'Trinity, Florida', 'search_type': 'text', 'page_title': 'site:linkedin.com/in ("Software Engineer" OR "Technical Recruiter" OR "Talent Acquisition" OR "TypeScript/JavaScript" OR "React.js" OR "Next.js") - Google Search', 'datetime': '2025-12-09T03:42:10.141Z', 'query': 'site:linkedin.com/in ("Software Engineer" OR "Technical Recruiter" OR "Talent Acquisition" OR "TypeScript/JavaScript" OR "React.js" OR "Next.js")'}, 'related': [], 'pagination': [{'page': '2', 'link': 'https://www.google.com/search?q=site:linkedin.com/in+(%22Software+Engineer%22+OR+%22Technical+Recruiter%22+OR+%22Talent+Acquisition%22+OR+%22TypeScript/JavaScript%22+OR+%22React.js%22+OR+%22Next.js%22)&sca_esv=ad1c47de9cd56ce0&gl=US&hl=en&ei=iJo3aengIqqLwbkPtpi

In [None]:
def extrair_perfis_linkedin(resultados):
    perfis = []

    for item in resultados:
        organic = item.get("organic", [])
        for r in organic:
            link = r.get("link", "")
            if "linkedin.com/in/" in link:
                perfis.append({
                    "profile_url": link,
                    "title": r.get("title", ""),
                    "snippet": r.get("snippet", "")
                })

    return perfis

In [None]:
def parse_nome_cargo_empresa(title: str):
    nome = ""
    cargo = ""
    empresa = ""

    if " - " in title:
        nome, resto = title.split(" - ", 1)

        if " at " in resto:
            cargo, empresa = resto.split(" at ", 1)
        else:
            cargo = resto
    else:
        nome = title

    return nome.strip(), cargo.strip(), empresa.strip()

def normalizar_perfis(perfis_raw):
    saida = []

    for p in perfis_raw:
        title = p.get("title", "")
        link = p.get("link") or p.get("profile_url", "")

        nome, cargo, empresa = parse_nome_cargo_empresa(title)

        # fallback de seguran√ßa
        if not nome:
            continue

        saida.append({
            "nome": nome,
            "cargo": cargo or "Professional",
            "empresa": empresa or "the company",
            "link": link
        })

    return saida


In [None]:
def salvar_no_sheets(dados: List[Dict], spreadsheet_id: str, sheet_name: str = 'P√°gina1'):
    scopes = ['https://www.googleapis.com/auth/spreadsheets']
    credentials = Credentials.from_service_account_file(
        GOOGLE_SHEETS_CREDENTIALS_FILE,
        scopes=scopes
    )
    service = build('sheets', 'v4', credentials=credentials)

    # apenas os dados, sem cabe√ßalho
    rows = [
        [
            d.get('name', ''),
            d.get('company', ''),
            d.get('title', ''),
            d.get('message', ''),
            d.get('profile_url', '')
        ]
        for d in dados
    ]

    body = {'values': rows}

    result = service.spreadsheets().values().append(
        spreadsheetId=spreadsheet_id,
        range=f'{sheet_name}!A1',
        valueInputOption='RAW',
        insertDataOption='INSERT_ROWS',
        body=body
    ).execute()

    print('‚úÖ Dados adicionados √† planilha com sucesso')


In [None]:
if __name__ == "__main__":
    curriculo = extrair_curriculo_gemini(CURRICULO_PATH)

    perfis_raw = extrair_perfis_linkedin(results)
    perfis = normalizar_perfis(perfis_raw)

    mensagens = []

    for perfil in perfis:
        mensagem = gerar_mensagem_template(
            nome=perfil["nome"],
            empresa=perfil["empresa"],
            cargo=perfil["cargo"],
            curriculo=curriculo
        )

        mensagens.append({
            "name": perfil["nome"],
            "company": perfil["empresa"],
            "title": perfil["cargo"],
            "message": mensagem,
            "profile_url": perfil["link"]
          })


Carregando curr√≠culo do arquivo salvo.

=== Gerando mensagem para Chris Graziani ===
Nome completo: Erick Augusto
Skills: JavaScript, TypeScript, C, Python, Java, React.js, Next.js, React Native, TailwindCSS, Vite, Expo, L...

=== Gerando mensagem para Brad Fuellenbach ===
Nome completo: Erick Augusto
Skills: JavaScript, TypeScript, C, Python, Java, React.js, Next.js, React Native, TailwindCSS, Vite, Expo, L...

=== Gerando mensagem para Matt Tompkins ===
Nome completo: Erick Augusto
Skills: JavaScript, TypeScript, C, Python, Java, React.js, Next.js, React Native, TailwindCSS, Vite, Expo, L...

=== Gerando mensagem para Steven Sill ===
Nome completo: Erick Augusto
Skills: JavaScript, TypeScript, C, Python, Java, React.js, Next.js, React Native, TailwindCSS, Vite, Expo, L...

=== Gerando mensagem para Susanna Kwon ===
Nome completo: Erick Augusto
Skills: JavaScript, TypeScript, C, Python, Java, React.js, Next.js, React Native, TailwindCSS, Vite, Expo, L...

=== Gerando mensagem para El

In [None]:
    for perfil in perfis:
        print(perfil)


{'nome': 'Chris Graziani', 'cargo': 'Senior Technical Recruiter', 'empresa': 'the company', 'link': 'https://www.linkedin.com/in/christophergraziani'}
{'nome': 'Brad Fuellenbach', 'cargo': 'Technical Recruiter', 'empresa': 'Apple', 'link': 'https://www.linkedin.com/in/fuellenbach'}
{'nome': 'Matt Tompkins', 'cargo': 'Fullstack Engineering with TypeScript, ...', 'empresa': 'the company', 'link': 'https://www.linkedin.com/in/matt-tompkins-06271539'}
{'nome': 'Steven Sill', 'cargo': 'Principal Technical Recruiter', 'empresa': 'the company', 'link': 'https://www.linkedin.com/in/stevesill'}
{'nome': 'Susanna Kwon', 'cargo': 'Senior Technical Recruiter', 'empresa': 'the company', 'link': 'https://www.linkedin.com/in/susannato'}
{'nome': 'Eliot Sanford', 'cargo': 'üêô Front-End Software Engineer ...', 'empresa': 'the company', 'link': 'https://www.linkedin.com/in/techieeliot'}
{'nome': 'Andrew Morlin', 'cargo': 'Sr Technical Recruiter - Product and AI/ML', 'empresa': 'the company', 'link': '

In [None]:
salvar_no_sheets(mensagens, SPREADSHEET_ID)

‚úÖ Dados adicionados √† planilha com sucesso
