In [None]:
# Update python path to include the parent directory
import sys

sys.path.append("..")

In [None]:
import pandas as pd
from src.services.leads import get_leads, get_last_lead
from src.services.cases import get_many_cases
from collections.abc import MutableMapping
import datetime

## Data Loading

In [None]:
leads_not_found = get_last_lead(
    start_date=datetime.datetime(2024, 1, 1),
    end_date=datetime.datetime(2024, 1, 1),
    status="not_found",
    limit=500,
)

In [None]:
leads_not_found

In [None]:
len(leads_not_found)

In [None]:
# Get cases 30 by 30
from src.components.leads import lead


cases = []
for i in range(0, len(leads_not_found), 30):
    cases += get_many_cases(
        [lead.case_id for lead in leads_not_found[i : i + 30]]
    )

cases = [c for c in cases if c.address_state_code is not None]

In [None]:
cases = cases[:500]

In [None]:
leads_not_found = get_last_lead(
    start_date=datetime.datetime(2023, 10, 15),
    end_date=datetime.datetime(2023, 11, 1),
    status="contacted",
    limit=500,
)

In [None]:
for i in range(0, len(leads_not_found), 30):
    cases += get_many_cases(
        [lead.case_id for lead in leads_not_found[i : i + 30]]
    )

In [None]:
cases = [c for c in cases if c.address_state_code is not None]

In [None]:
cases = cases[:1001]

In [None]:
df = pd.DataFrame([case.model_dump() for case in cases])

In [None]:
df.columns

In [None]:
# Fields
"""['case_id', 'court_id', 'participants', 'related_cases',
    'protection_order', 'parties', 'disposed', 'legal_fileaccepted',
    'paper_accepted', 'confidential', 'display_judgenotes',
    'case_notecount', 'display_legalfileviewer', 'display_fileviewer',
    'can_userseepublicdocuments', 'can_userseecasedocuments',
    'can_userseeenoticehistory', 'can_selectdocket', 'can_seeecflinks',
    'can_seelegalfilelinks', 'is_ticket', 'address_a_type', 'address_city',
    'address_line_1', 'address_seq_no', 'address_state_code', 'address_zip',
    'birth_date', 'birth_date_code', 'criminal_case', 'criminal_ind',
    'description', 'description_code', 'first_name', 'year_of_birth',
    'formatted_party_address', 'formatted_party_name',
    'formatted_telephone', 'last_name', 'lit_ind', 'middle_name',
    'party_type', 'pidm', 'pred_code', 'prosecuting_atty', 'pty_seq_no',
    'sort_seq', 'age', 'case_desc', 'court_desc', 'location', 'filing_date',
    'case_date', 'formatted_filingdate', 'case_type', 'case_security',
    'case_typecode', 'vine_code', 'locn_code', 'court_code', 'vine_display',
    'vine_id', 'dockets', 'documents', 'charges', 'judge', 'court_type',
    'ticket_searchresult', 'fine', 'plea_andpayind', 'ticket', 'ticket_img',
    'status', 'events', 'court_date', 'court_time', 'court_link',
    'arrest_date', 'arrest_time', 'where_held', 'gender', 'release_info',
    'source', 'custom']
"""

In [None]:
# Target fields first_name	last_name	street	suite	city	state	postal_code	mailing_street
cases_not_found_fields = df[
    [
        "case_id",
        "first_name",
        "last_name",
        "middle_name",
        "address_line_1",
        "address_city",
        "address_state_code",
        "address_zip",
        "formatted_party_address",
    ]
].rename(
    columns={
        "address_line_1": "street",
        "address_state_code": "state",
        "address_zip": "postal_code",
        "address_city": "city",
        "formatted_party_address": "mailing_street",
    }
)

In [None]:
# Remove the \n from the mailing_street
cases_not_found_fields.mailing_street = cases_not_found_fields.mailing_street.apply(
    lambda x: x.replace("\n", "") if isinstance(x, str) else x
)

In [None]:
cases_not_found_fields.to_csv("cases.csv", index=False)

In [None]:

def flatten(dictionary, parent_key="", separator="_"):
    items = []
    for key, value in dictionary.items():
        new_key = parent_key + separator + key if parent_key else key
        if isinstance(value, MutableMapping):
            items.extend(flatten(value, new_key, separator=separator).items())
        else:
            items.append((new_key, value))
    return dict(items)


# Leads Analysis

In [None]:
leads_data_week_1 = get_leads(
    start_date=datetime.datetime(2024, 1, 1),
    end_date=datetime.datetime(2024, 1, 7),
)

In [None]:
len(leads_data_week_1)

In [None]:
leads_data_week_2 = get_leads(
    start_date=datetime.datetime(2024, 1, 8),
    end_date=datetime.datetime(2024, 1, 14),
)

In [None]:
len(leads_data_week_2)

In [None]:
df = pd.concat(
    [
        pd.DataFrame([lead.model_dump() for lead in leads_data_week_1]),
        pd.DataFrame([lead.model_dump() for lead in leads_data_week_2]),
    ]
)

In [None]:
df[df.court_code == "IL_COOK"].case_id.map(lambda x : x[:8]).value_counts()

In [None]:
df[df.court_code == "IL_COOK"].case_id

In [None]:
df.state.fillna("MO", inplace=True)

In [None]:
df["week"] = df.case_date.apply(lambda x: x.isocalendar()[1])

In [None]:
df.groupby(["week", "status"]).case_id.count().to_csv("scraping.csv")

In [None]:
df