In [1]:
from playwright.async_api import async_playwright
from urllib import parse

# Update python path to include the parent directory
import os
import sys
import datetime
from datetime import timedelta

sys.path.append("..")

os.environ["ROOT_PATH"] = "/Users/aennassiri/projects/fubloo/app"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = (
    "/Users/aennassiri/projects/fubloo/app/configuration/fubloo-app-1f213ca274de.json"
)

In [2]:
import requests

import pandas as pd
from src.services.leads import get_leads, get_last_lead, patch_lead

## Define Variables


In [3]:
LOGIN_PAGE_URL = "https://www.beenverified.com/app/login"
HOME_PAGE_URL = "https://www.beenverified.com/rf/dashboard"
REPORT_PAGE_URL = "https://www.beenverified.com/api/v5/reports"
SEARCH_PAGE_URL = "https://www.beenverified.com/rf/search/person"

In [4]:
pw = await async_playwright().start()
browser = await pw.chromium.launch(headless=False)



In [5]:
context = await browser.new_context(storage_state="playwright/.auth/state.json")
page =  await context.new_page()
await page.goto(HOME_PAGE_URL)

<Response url='https://www.beenverified.com/rf/dashboard' request=<Request url='https://www.beenverified.com/rf/dashboard' method='GET'>>

## Search for lead 

In [6]:
def build_search_url(
    first_name,
    last_name,
    middle_name: str | None,
    city: str | None = None,
    state: str | None = None,
    age: int | None = None,
):
    params = {}
    if city:
        params['city'] = city
    if state:
        params['state'] = state
    if age:
        params['age'] = age
    if first_name:
        params['fname'] = first_name
    if last_name:
        params['ln'] = last_name
    if middle_name:
        params['mn'] = middle_name
    
    url = parse.urlencode(params)
    url = f"{SEARCH_PAGE_URL}?{url}"

    return url

# get top 5 users

In [7]:
def score_of_leads(result, search_query):
    score = 0

    if search_query.get("first_name").lower() not in result.lower() or search_query.get("last_name").lower() not in result.lower() or "deceased" in result.lower():
        score = -1
        return score

    score += result.lower().count("alias")
    score += result.lower().count("relatives")
    score += result.lower().count(search_query.get("city").lower() if search_query.get("city") else "")
    score += result.lower().count(search_query.get("state").lower() if search_query.get("state") else "")
    score += result.lower().count(search_query.get("age").lower() if search_query.get("age") else "")
    score += result.lower().count(search_query.get("middle_name").lower() if search_query.get("middle_name") else "")

    return score


## Get info leasd

In [8]:
async def open_report_popup(page, container_user):
    async with page.expect_popup() as popup_info:
        view_report = await container_user.query_selector(".css-gnnc3j")
        await view_report.click()
        new_page = await popup_info.value
        return new_page


In [9]:
async def open_report_page(page, container_user):
    async with page.expect_popup() as popup_info:
        view_report = await container_user.query_selector(".css-gnnc3j")
        await view_report.click()
        new_page = await popup_info.value
        return new_page



# Exetra information of the user

In [10]:
async def extract_phone_numbers(page):
    container_phone = await page.query_selector("#phone-numbers-section")
    phone_numbers = await container_phone.query_selector_all(".css-1vugsqn")
    phone_numbers = [await phone_number.text_content() for phone_number in phone_numbers]
    return phone_numbers



In [11]:
async def extract_addresses(page):
    addresses_list = []
    container_address = await page.query_selector("#address-history-section")
    addresses = await container_address.query_selector_all(".css-1q4wjho")
    for address in addresses:
        address_fields = await address.query_selector_all(".css-zv7ju9")
        addresses_txt = [await address_field.text_content() for address_field in address_fields]
        addresses_list.append(' '.join(addresses_txt))
        
    return addresses_list



In [12]:
async def extract_email_list(page):
   
    email_container = await page.query_selector("#email-section")
    email_elements = await email_container.query_selector_all(".css-1vugsqn")
    email_list = [await email_element.text_content() for email_element in email_elements]
    return email_list



In [13]:
async def get_lead_info(new_page):
    extra_phone_numbers = await extract_phone_numbers(new_page)  # noqa: F821
    extra_addresses = await extract_addresses(new_page)
    extra_emails = await extract_email_list(new_page)
    return extra_phone_numbers, extra_addresses, extra_emails

# extra_phone_numbers, extra_addresses, extra_emails = await get_lead_info(new_page)
# extra_phone_numbers, extra_addresses, extra_emails

    
    
    

## Get info of the user

In [14]:
# current from (314) 691-4319 to +13146914319
def format_phone_number(phone_number):
    output = (
        phone_number.replace("(", "")
        .replace(")", "")
        .replace(" ", "")
        .replace("-", "")
    )

    if len(output) == 10:
        return f"+1{output}"
    else:
        return output

In [15]:
consecutive_timeouts = 0

async def search_person(
    first_name,
    last_name,
    middle_name,
    age,
    city="",
    state="",
    zip="",
    addressLine1="",
    addressLine2="",
):
    global consecutive_timeouts
    search_query = {
        "first_name": str(first_name),
        "last_name": str(last_name),
        "middle_name": str(middle_name),
        "age": str(age),
        "city": str(city),
        "state": str(state),
        "zip": str(zip),
        "addressLine1": str(addressLine1),
        "addressLine2": str(addressLine2),
        
    }
    # Triggering the search
    url = build_search_url(
        first_name, last_name, middle_name, city, state, age
    )
    await page.goto(url)

    # Getting results
    try:
        await page.wait_for_selector(".css-ts1zsd")
    except Exception as e:
        print("Timeout")
        consecutive_timeouts += 1
        if consecutive_timeouts > 20:
            print("Too many timeouts")
            return
        print("No leads found")
        return
    container_results = await page.query_selector(".css-ts1zsd")
    
    if container_results is None: 
        print("No leads found")
        return
    
    try:
        await container_results.wait_for_selector(".css-1mvdt3q")
    except Exception as e:
        print("Timeout")
        consecutive_timeouts += 1
        if consecutive_timeouts > 10:
            print("Too many timeouts")
            return
        print("No leads found")
        return
    
    container_users = await container_results.query_selector_all(
        ".css-1mvdt3q"
    )

    consecutive_timeouts = 0

    # Measure the score for each lead
    max_score = -1
    max_score_id = -1
    for i, lead in enumerate(container_users):
        results = await lead.text_content()
        score = score_of_leads(results, search_query)
        print(f"Score of lead {i} is {score}")
        if score > max_score:
            max_score = score
            max_score_id = i

    print(f"Max score is {max_score} for lead {max_score_id}")

    if max_score < 0:
        print("No leads found")
        return

    # Extracting the leads
    selected_lead = container_users[max_score_id]
    text = await selected_lead.text_content()
    print(text)

    # Opening the report page
    new_page = await open_report_page(page, selected_lead)
    await new_page.wait_for_selector("#person-overview", timeout=60000)

    # Extracting the extra info
    extra_phone_numbers, extra_addresses, extra_emails = await get_lead_info(new_page)
    print(extra_phone_numbers, extra_addresses, extra_emails)

    results = {
        "phone_numbers": extra_phone_numbers,
        "addresses": extra_addresses,
        "emails": extra_emails
    }

    for p in context.pages[1:]:
        await p.close()

    return results

In [16]:
today = datetime.datetime.now()

In [17]:
from time import sleep

sleep(1000)

while True:  
    try:
        lead = get_last_lead(
            start_date=today - timedelta(days=30),  
            end_date=today + timedelta(days=1),
            status="new",
            limit=1,
        )

        patch_lead(case_id=lead.case_id, status="processing")

        if lead is None:
            print("No leads found. Waiting for new leads")
            page.wait_for_timeout(100000)

        lead.state = lead.state or "MO"

        results = await search_person(
            first_name=lead.first_name,
            last_name=lead.last_name,
            middle_name=lead.middle_name,
            age=lead.age,
            city=lead.city,
            state=lead.state,
        )

        if results:
            details = {
                "phones": [format_phone_number(p) for p in results["phone_numbers"]],
                "phone": {
                    str(k): {
                        "phone": format_phone_number(p),
                    }
                    for k, p in enumerate(results["phone_numbers"])
                },
                "emails": results["emails"],
                "report": {
                    "addresses": results["addresses"],
                },
                "lead_source": "beenverified",
                "status": "not_contacted",
            }
            patch_lead(case_id=lead.case_id, **details)
            await page.wait_for_timeout(60000)
        else:
            patch_lead(case_id=lead.case_id, status="not_found", lead_source="beenverified")

    except Exception as e:
        await page.wait_for_timeout(100000)

  leads_list = leads_list.where("case_date", ">=", start_date)
  leads_list = leads_list.where("case_date", "<=", end_date)
  leads_list = leads_list.where("status", "==", status)


Score of lead 0 is 7
Max score is 7 for lead 0
Name matchCatherine R Munson, 34Kansas City, MOBornDec 1989AliasesCatherine R EuwerRelativesJerod R Euwer, Kelsey Nicole Euwer, Peggy A Euwer, +2 MoreLocationsKansas City, MO • Riverside, MOView Person Report
['(816) 769-2536'] ['1304 123rd St Kansas City, MO 64165', '10043 Lister Ave Kansas City, MO 64156', '4937 High Dr Riverside, MO 64150', '600 73rd Ter Kansas City, MO 64118', '4936 High Dr Riverside, MO 64150'] []
Score of lead 0 is 16
Max score is 16 for lead 0
Name matchMegan N Rice, 22Excelsior Springs, MOBornJun 2001RelativesJames Anthony Rice, Kelli Jo Rice, Sydney A RiceLocationsExcelsior Springs, MOView Person Report
['(816) 503-1721'] ['940 Wornall Rd Excelsior Springs, MO 64024'] []
Score of lead 0 is 5
Score of lead 1 is 6
Max score is 6 for lead 1
Name matchJames Vernon Coe, 68Kansas City, MOBorn1956RelativesDennis Delbert Coe, Terri Lynn Coe, Lisa Marie SmithLocationsKansas City, MOView Person Report
['(816) 452-3562'] ['3

In [None]:
lead = get_last_lead(
    start_date=today - timedelta(days=14),
    end_date=today + timedelta(days=1),
    status="new",
    limit=1,
)

  leads_list = leads_list.where("case_date", ">=", start_date)
  leads_list = leads_list.where("case_date", "<=", end_date)
  leads_list = leads_list.where("status", "==", status)


In [None]:
lead

In [None]:
lead.case_id

'SOLIS-MARTINEZ__DAVID_39_02_18_2024'

In [None]:
details

{'phones': ['+12096060589', '+12098921814', '+15104819427', '+13034130557'],
 'phone': {'0': {'phone': '+12096060589'},
  '1': {'phone': '+12098921814'},
  '2': {'phone': '+15104819427'},
  '3': {'phone': '+13034130557'}},
 'emails': ['karlasol1979@yahoo.com',
  'larthatta@gmail.com',
  'gulchera124000@yahoo.com'],
 'report': {'addresses': ['1346 Pinto Way Patterson, CA 95363',
   '1201 Yellowhammer Dr Patterson, CA 95363',
   '3563 Franklin Ave Fremont, CA 94538',
   '1674 E St Hayward, CA 94541',
   '132 Paramatta Dr Patterson, CA 95363',
   '15610 Via Granada San Lorenzo, CA 94580',
   '606 Trout Creek Ln Patterson, CA 95363',
   '15861 Hesperian Blvd San Lorenzo, CA 94580']},
 'lead_source': 'beenverified',
 'status': 'not_contacted'}

In [None]:
lead = get_last_lead(
    start_date=today - timedelta(days=7),
    end_date=today + timedelta(days=1),
    status="new",
    limit=1,
)

NameError: name 'today' is not defined

In [None]:
await browser.close()
await pw.stop()

In [None]:
lead

Lead(id=None, case_id='WATKINS__VINCENT_M_64_02_18_2024', first_name='VINCENT', last_name='WATKINS', middle_name='M', court_code='temp', age=64, year_of_birth=1960, email=None, phone=None, phones=None, address=None, city=None, state=None, zip_code=None, county=None, creation_date=datetime.datetime(2024, 2, 18, 23, 17, 56, 652086), last_updated=DatetimeWithNanoseconds(2024, 2, 18, 10, 1, 10, 749549, tzinfo=datetime.timezone.utc), case_date=datetime.datetime(2024, 2, 18, 0, 0, tzinfo=datetime.timezone.utc), status='new', source='mo_mshp', charges_description='1) FELONY DWI - HABITUAL 2) EXCEEDED POSTED SPEED LIMIT BY 16-19 MPH 3) OPERATE VEH ON HWY WITHOUT VALID LICENSE - 1ST OFFENSE', disposed=False, carrier=None, notes=None, violation=None, court=None, accidentCheckbox=False, commercialDriverLicence=False, ticket_img=None, user_id=None, report=None, details=None, lead_source=None, cloudtalk_upload=False)

In [None]:
lead.state