In [1]:
from playwright.async_api import async_playwright

# Update python path to include the parent directory
import os
import sys
import re

# Update python path to include the parent directory
import os
import sys
import datetime
from datetime import timedelta

from rich.console import Console

console = Console()

sys.path.append("..")

os.environ["ROOT_PATH"] = "/Users/aennassiri/Projects/Personal/ticket-washer"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = (
    "/Users/aennassiri/Projects/Personal/ticket-washer/configuration/fubloo-app-1f213ca274de.json"
)

In [2]:
import requests

import pandas as pd
from src.services.leads import get_leads, get_last_lead, patch_lead
from src.services.leads import (
    get_leads,
    get_last_lead,
    patch_lead,
    LeadsService,
)
from src.services.cases import get_single_case
from src.models import Lead, Case

leads_service = LeadsService()

## Define Variables


In [3]:
HOME_PAGE_URL = "https://riskmanagement.lexisnexis.com/app/bps/misc#"

In [4]:
pw = await async_playwright().start()
browser = await pw.chromium.launch(headless=False, args=["--proxy-server=socks5://localhost:9090"])
context = await browser.new_context(
        storage_state="playwright/.auth/lexis.json"
    )


In [5]:
async def close_tabs(page):
    # Close all the opened tabs button with class remove-tab except the last one
    elements = (
        await page.get_by_role("button").all()
    )

    async def get_aria_label(elem):
        label = await elem.get_attribute("aria-label")
        if label is None:
            label = ""
        return label.lower()

    tabs_to_close = [
        elem
        for elem in elements
        if "close tab" in await get_aria_label(elem)
    ]
    if len(tabs_to_close) > 1:
        for elem in tabs_to_close[:-1]:
            try:
                console.log("Closing tab")
                await elem.click()
                await page.wait_for_timeout(1000)
            except:
                pass

In [6]:
async def search_person(
    first_name,
    last_name,
    middle_name="",
    dob="",
    city="",
    state="",
    zip="",
    address_line1="",
    address_line2="",
):

    page = await context.new_page()
    await page.goto(HOME_PAGE_URL)
    # Click on the a link with the attr data-event="SEARCH2/SHOW_PHONE_FINDER"
    element = page.get_by_role("link").and_(page.get_by_text("Phone Finder"))
    await element.click()

    # Wait 5 seconds
    await page.wait_for_timeout(5000)

    # Close all the opened tabs button with class remove-tab except the last one
    await close_tabs(page)

    # Fill the form LAST_NAME, FIRST_NAME, MI, STREET_ADDRESS, CITY, STATE, ZIP
    await page.fill("input[name=LAST_NAME]", last_name)
    await page.fill("input[name=FIRST_NAME]", first_name)
    await page.fill("input[name=MI]", middle_name)
    await page.fill("input[name=STREET_ADDRESS]", address_line1)
    await page.fill("input[name=CITY]", city)
    # State is a dropdown
    await page.select_option("select[name=STATE]", state)
    await page.fill("input[name=ZIP]", zip)

    # Select radio PHONE_FINDER_TYPE_B
    element = await page.get_by_role("radio").all()
    for elem in element:
        value = await elem.get_attribute("id")
        if value == "PHONE_FINDER_TYPE_B":
            await elem.click()

    # Click on the search button
    # Find in the element with id = portal-search-buttons
    section = page.locator("#portal-search-buttons")
    # Find the button with the text "Search"
    element = section.get_by_text("Search")
    await element.click()

    # Await until this elements is visible search-results-row

    # Await 30 seconds
    await page.wait_for_timeout(5000)

    # Find the button download <button type="button" data-placement="bottom" class="btn btn-secondary btn-sm download-icon svg-icon-tiny print-download-dialog" aria-label="Download Results" data-original-title="Download Results" data-download-active-tab="767ae742d9f0307032ce888306d2555c" data-tab_id="767ae742d9f0307032ce888306d2555c" data-type="download"></button>

    text = await page.get_by_text("No documents were found for your search terms.").all()

    if len(text) > 0:
        await page.close()
        console.log("No documents were found for your search terms.")
        return None

    # Click on the download button
    element = await page.locator("button[data-type=download]").all()
    for elem in element:
        await elem.click()
        break

    # Wqit 5 seconds
    await page.wait_for_timeout(5000)
    # Select from the dropdown with id DOWNLOAD_FORMAT the option download_format_html
    element = page.locator("select#DOWNLOAD_FORMAT").first
    await element.select_option("HTML")

    async with page.expect_download() as download_info:
        element = page.locator("button.start-verify-print-download").first
        await element.click()

    download = await download_info.value
    await download.save_as("temp/" + download.suggested_filename)

    # Read the file with filereader
    with open("temp/" + download.suggested_filename, "r") as file:
        data = file.read()

    # Close the page

    # Extract with re phone numbers with the format 123-456-7890

    phones = re.findall(r"\d{3}-\d{3}-\d{4}", data)

    phones = [
        f"+1{p.replace('-', '')}"
        for p in
        set(phones)
        ]

    details = {
        "phones": phones,
        "phone": {
            str(k): {
                "phone": p,
            }
            for k, p in enumerate(phones)
        },
        "email": None,
        "report": {"data": data},
        "lead_source": "lexis_nexis_phone_finder",
    }

    await page.close()

    return details

In [7]:
from src.commands.leads import filter_leads


today = datetime.datetime.now()

# Get all leads that have been mailed in the last 7 days
leads_not_found = get_last_lead(
    start_date=today - timedelta(days=33),
    end_date=today + timedelta(days=1),
    status="rpr",
    limit=3000,
    search_limit=3000,
)

if leads_not_found is None:
    console.log("No leads found")
    # Stop executing the cell
    raise SystemExit

df = pd.DataFrame(
    [lead.model_dump() for lead in leads_not_found if filter_leads(lead)]
)


# Filter out the leads that have already been searched
df["middle_name"] = df["middle_name"].fillna("")
df["first_name"] = df["first_name"].fillna("")
df["last_name"] = df["last_name"].fillna("")

console.log(f"Total leads: {len(df)}")

df["state"] = df["state"].fillna("MO")

# df = df[df["state"] == "MO"]
df = df[df["lead_source"] != "lexis_nexis"]

  leads_list = leads_list.where("case_date", ">=", start_date)
  leads_list = leads_list.where("case_date", "<=", end_date)
  leads_list = leads_list.where("status", "==", status)


In [None]:
cases_outputs = {}

for case, case_details in df.iterrows():
    # Get the case details
    first_name = case_details["first_name"]
    last_name = case_details["last_name"]
    middle_name = case_details["middle_name"]
    dob = case_details["year_of_birth"]
    key = case_details["case_id"]

    # Get the case details from casenet
    case_info = get_single_case(case_id=key)
    city = case_details["city"] or case_info.address_city or ""
    state = case_details["state"] or case_info.address_state_code or "MO"
    zip = case_details["zip_code"] or case_info.address_zip or ""
    address_line1 = case_details["address"] or case_info.address_line_1 or ""
    address_line2 = ""

    if address_line1 == "":
        console.log(f"Case {key} has no address")
        continue

    # Search if a case with first name, last name, middle name, dob, exists
    console.log(
        f"Searching for {first_name} {last_name} {dob} {city} {state} {zip} {address_line1} {address_line2}"
    )

    # Check if the case already exists in the database
    try:
        cases_search = leads_service.get_items(
            first_name=first_name,
            last_name=last_name,
            middle_name=middle_name,
            year_of_birth=dob,
        )
    except Exception as e:
        console.log(f"Error searching for {first_name} {last_name} {dob}")
        console.log(e)
        cases_search = None

    if cases_search is not None:
        stop_search = False
        for case_found in cases_search:
            if (
                case_found.status != "not_found"
                and case_found.status != "mailed"
                and case_found.status != "not_contacted_prioritized"
                and case_found.status != "not_valid"
                and case_found.status != "stop"
                and case_found.status != "contacted"
                and case_found.status != "wait"
                and case_found.phone is not None
            ):
                console.log(
                    f"Case {case_found.case_id} already exists with status {case_found.status}"
                )
                console.log(
                    f"Case details {case_found.first_name} {case_found.last_name} {case_found.year_of_birth} {case_found.charges_description}"
                )
                details = {
                    "status": "not_contacted_prioritized",
                    "phones": case_found.phones,
                    "email": case_found.email,
                    "phone": case_found.phone,
                    "report": case_found.report,
                    "lead_source": case_found.lead_source,
                }
                stop_search = True
        if stop_search:
            cases_outputs[key] = "Found in the database"
            continue

    # Search for the person in Lexis Nexis
    try:
        details = await search_person(
            first_name,
            last_name,
            middle_name,
            dob,
            city,
            state=state,
            zip=zip,
            address_line1=address_line1,
            address_line2=address_line2,
        )
    except Exception as e:
        console.log(f"Error searching for {first_name} {last_name} {dob}")
        console.log(e)
        for page in context.pages:
            await page.close()
        details = None

    # Update the case with the details
    if details is not None:
        if (
            details.get("phones") is not None
            and len(details.get("phones")) > 0
        ):
            console.log(
                f"Found a good record for {first_name} {last_name} {dob} - in Lexis Nexis"
            )
            details["status"] = "not_contacted_prioritized"
            cases_outputs[key] = "Found in Lexis Nexis"
        else:
            details["status"] = "not_found"
            console.log(
                f"No records found for {first_name} {last_name} {dob}. Found similar records"
            )
            cases_outputs[key] = "Not Found in Lexis Nexis"
        patch_lead(case_id=key, **details)

    else:
        console.log(f"No details found for {first_name} {last_name} {dob}")
        details = {
            "status": "not_found",
            "lead_source": "lexis_nexis_phone_finder",
        }
        cases_outputs[key] = "Not Found in Lexis Nexis"
        patch_lead(case_id=key, **details)

In [None]:
df = pd.DataFrame(
    cases_outputs.items(),
    columns=["case_id", "status"],
).groupby("status").count()

In [None]:
df

Unnamed: 0_level_0,case_id
status,Unnamed: 1_level_1
Found in Lexis Nexis,109
Found in the database,63
Not Found in Lexis Nexis,106


In [None]:
lexis_nexis_phone_finder_leads = leads_service.get_items(
    lead_source="lexis_nexis_phone_finder"
)
results = pd.DataFrame(
    [lead.model_dump() for lead in lexis_nexis_phone_finder_leads]
)
    

In [None]:
results.groupby("status").count()

Unnamed: 0_level_0,id,case_id,first_name,last_name,middle_name,court_code,age,year_of_birth,phone,phones,...,violation,court,accidentCheckbox,commercialDriverLicence,ticket_img,user_id,report,details,lead_source,cloudtalk_upload
status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
not_contacted_prioritized,88,88,88,88,80,88,88,88,88,88,...,0,0,88,88,0,0,88,88,88,88
not_found,17,17,17,17,15,17,16,16,17,17,...,0,0,17,17,0,0,17,11,17,17
