In [1]:
import pandas as pd
import plotly.express as px
import json 

In [2]:
import jinja2
from IPython.core.display import HTML

In [3]:
with open("leads.json", "r") as f:
    data_raw = json.load(f)

In [4]:
data = pd.DataFrame(data_raw.values())

In [5]:
# Show in a graph the len of the data by county and by date 
fig = px.bar(data.groupby("court_name").case_id.count(), title="Leads by county")
fig.show()

In [6]:
data["interactions_counts"] = data.interactions.map(lambda x: int(len(x)>=1) if isinstance(x, list) else 0)

In [7]:
# Plot the bars of the interactions by date from the total amount of cases 
interactions = data.groupby("case_date").agg({"interactions_counts": "sum", "case_id": "count"})
interactions["interactions_per_case"] = interactions.interactions_counts / interactions.case_id
fig = px.bar(data.groupby("case_date").interactions_counts.sum(), title="Interactions by date", color_discrete_sequence=["red"])
# Add the total number of cases by date in red
fig.add_trace(px.bar(data.groupby("case_date").case_id.count(), title="Cases by date").data[0])
fig.show()

In [54]:
data

Unnamed: 0,case_id,case_type,court_code,court_name,case_date,first_name,last_name,been_verified,age,year_of_birth,charges,details,email,phone,interactions,interactions_counts
0,220306490,Traffic%2FMunicipal,SMPDB0001_CT07,Clay County - 7th Judicial Circuit,2023-01-20,MADISON,RAY,True,29,1994,Fail To Stop At Stop Sign At Stop Line/Before ...,Madison D Ray\nAge 31 years old\nBorn October ...,DKRAYLOVESJESUS@GMAIL.COM,(417) 838-7987,[],0
1,190095621,Traffic%2FMunicipal,SMPDB0001_CT07,Clay County - 7th Judicial Circuit,2023-01-20,JASON,WISDOM,True,,"[WISDOM, JASON R, Defendant \n\n\n\n\n\n\n\n...",Miscellaneous Parking Violation \r{ Ordinance ...,Jason Wisdom\nAge 21 years old\nBorn August 20...,jasonwisdom77@gmail.com,(573) 480-1786,,0
2,190099770,Traffic%2FMunicipal,SMPDB0001_CT07,Clay County - 7th Judicial Circuit,2023-01-20,ISRAEL,CHACON,True,,"[CHACON, ISRAEL, Defendant \n\n\n\n\n\n\n\n\...",Parking Violation \r{ Ordinance RSMo: Not Avai...,Israel Chacon\nAge 27 years old\nBorn November...,ISRABB1995@GMAIL.COM,No phone numbers found,,0
3,190095620,Traffic%2FMunicipal,SMPDB0001_CT07,Clay County - 7th Judicial Circuit,2023-01-20,JASON,WISDOM,True,,"[WISDOM, JASON R, Defendant \n\n\n\n\n\n\n\n...",Handicapped Parking Violation \r{ Ordinance RS...,Jason Wisdom\nAge 21 years old\nBorn August 20...,jasonwisdom77@gmail.com,(573) 480-1786,,0
4,190098041,Traffic%2FMunicipal,SMPDB0001_CT07,Clay County - 7th Judicial Circuit,2023-01-20,GHEIA,TURK,True,,"[TURK, GHEIA S, Defendant \n\n\n\n\n\n\n\n\n...",Miscellaneous Parking Violation \r{ Ordinance ...,Gheia Shonta Johnson\nAge 44 years old\nBorn A...,SHONYSHON6@YAHOO.COM,(816) 516-1712,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
570,220252945,Traffic%2FMunicipal,CT16,Jackson County - 16th Judicial Circuit,2023-02-01,LISA,TUCKER,True,44,1979,Animal License \r{ Ordinance RSMo: Not Availab...,Lisa D Johnson\nAge 42 years old\nBorn Novembe...,BRUNOMAN520@GMAIL.COM,(417) 489-4366,,0
571,220252946,Traffic%2FMunicipal,CT16,Jackson County - 16th Judicial Circuit,2023-02-01,LISA,TUCKER,True,44,1979,Animal License \r{ Ordinance RSMo: Not Availab...,Lisa D Johnson\nAge 42 years old\nBorn Novembe...,BRUNOMAN520@GMAIL.COM,(417) 489-4366,,0
572,210324008,Traffic%2FMunicipal,CT16,Jackson County - 16th Judicial Circuit,2023-02-01,EVERETT,WILLIAMS,True,30,1993,Dwi - Alcohol \r{ Ordinance RSMo: Not Available },Everett L Williams\nAge 31 years old\nBorn 199...,chrsshea@gmail.com,No phone numbers found,,0
573,210333626,Traffic%2FMunicipal,CT16,Jackson County - 16th Judicial Circuit,2023-02-01,CHAD,ESRY,True,35,1988,Driver Of Mtr Veh Failed To Wear Properly Adju...,Chad Dustin Esry\nAge 34 years old\nBorn April...,ESRY43CHAD43@GMAIL.COM,(816) 824-2981,,0


In [55]:
# How many phone numbers we get
cases_with_phone_nb = data.phone.map(lambda x: 1 if "no" not in x.lower() else 0).sum() / data.case_id.count()
print(f"{cases_with_phone_nb*100:.2f}% of the cases have a phone number")


75.30% of the cases have a phone number


In [69]:
statistics_html_template = """
{% for county, county_data in data.items() %}
    <h2>{{county}}</h2>
    <p>Number of leads: {{county_data["cases"]}}</p>
    <p>Number of leads contacted: {{county_data["interactions"]}}</p>
    <p>Number of leads contacted in percentage: {{county_data["interactions_per_case"]}}</p>
    <p>Number of leads with phone number: {{county_data["cases_with_phone_nb"]}}</p>
{% endfor %}
"""

template = jinja2.Template(statistics_html_template)
data_output = {}
for county, county_data in data.groupby("court_name"):
    data_output[county] = {
        "cases": county_data.case_id.count(),
        "interactions": f"{county_data.interactions_counts.sum():.0f}",
        "interactions_per_case": f"{100*county_data.interactions_counts.sum() / county_data.case_id.count():.2f}%",
        "cases_with_phone_nb": f"{100*county_data.phone.map(lambda x: 1 if 'no' not in x.lower() else 0).sum() / county_data.case_id.count():.2f}%"
    }

HTML(template.render(data=data_output))
