In [54]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from helper import clean_column_names
import networkx as nx

In [55]:
def clean_brady_table(df):
    df.columns = [col.lstrip('\'') for col in df.columns]

    df = df.applymap(lambda x: x.lstrip('\'') if isinstance(x, str) else x)
    
    df = df.applymap(lambda x: x.lower().strip() if isinstance(x, str) else x)


    df.loc[:, "last_name"] = (df.last_name
                              .str.replace(r"\'RULE(.+)", "", regex=True)
                              .str.replace(r"^\'$", "", regex=True)
    )

    df = df[~((df.last_name.fillna("") == ""))]

    df.loc[:, "tracking_id"] = (df.tracking_id
                                .str
                                .lower()
                                .str.strip()
                                .str.replace(r"\s+", "", regex=True)
                                .str.replace(r"\'", "", regex=True)
    )

    df.loc[:, "allegation"] = (df.allegation_rule
                               .str.cat(df.allegation_paragraph, sep=" ")
    )

    df.loc[:, "allegation"] = (df.allegation
                               .str.lower()
                               .str.strip()
                               .str.replace(r"\'", "", regex=True)
    )

    df = df.drop(columns=["allegation_rule", "allegation_paragraph"])

    df.loc[:, "allegation_desc"] = (df.allegation_desc.str
                                    .lower()
                                    .str.strip()
                                    .str.replace(r"\'$", "", regex=True)
                                    .str.replace(r"^$", "missing", regex=True)
    )
    return df 

In [56]:
def read_brady_tbl():
    df = pd.read_csv("data/input/brady_2016_2023.csv")
    return df 

df = read_brady_tbl()
df = df.pipe(clean_brady_table).pipe(clean_column_names)


review = df[df.allegation_desc.str.lower().str.contains("domestic")]

review.allegation_desc.unique()


DataFrame.applymap has been deprecated. Use DataFrame.map instead.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.



array(['mississippi code title 97-3- 7 (3) domestic violence (simple assault)',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 19',
       'nopd policy: chapter 42.4 - domestic violence',
       'nopd policy: chapter 42.4 domestic violence',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 12',
       'nopd chapter: policy 42.4, domestic violence arrest procedures, paragraph 12',
       'r.s. 14:35.3 relative to domestic abuse battery',
       '(c) 6: failing to comply with instructions, oral, or written, for any authoritative source, to wit chapter 42.4, domestic violence, interview the parties and witnesses, paragraph 14(c)',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 82: reports',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 95',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 14',
       'nopd policy: chapter 42.4.1 domestic disturbance',
       'nopd policy: chapter 42.4.1 domestic disturb

In [57]:
df.year.value_counts()

year
2016    1730
2019    1413
2021    1391
2017    1375
2020    1264
2018    1254
2023    1173
2022     901
Name: count, dtype: int64

In [58]:
dv_df = df[df.allegation_desc.fillna("").str.contains("domestic")]

In [59]:

dv_df = dv_df[dv_df.allegation_desc.str.contains("(nopd policy|nopd chapter|paragraph)")]

dv_df.year.value_counts()

dv_df.allegation_desc.unique()

# dv_df.to_csv("data/output/dv_df_brady.csv", index=False)


This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.



array(['nopd policy: chapter 42.4 - domestic violence; paragraph 19',
       'nopd policy: chapter 42.4 - domestic violence',
       'nopd policy: chapter 42.4 domestic violence',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 12',
       'nopd chapter: policy 42.4, domestic violence arrest procedures, paragraph 12',
       '(c) 6: failing to comply with instructions, oral, or written, for any authoritative source, to wit chapter 42.4, domestic violence, interview the parties and witnesses, paragraph 14(c)',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 82: reports',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 95',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 14',
       'nopd policy: chapter 42.4.1 domestic disturbance',
       'nopd policy: chapter 42.4.1 domestic disturbance; paragraph 3',
       'nopd policy: chapter 42.4 - domestic violence; paragraph 16',
       'nopd policy: chapter 42.4.1 domestic dis

In [60]:
# dv_df_criminal =  dv_df[~(dv_df.allegation_desc.str.contains("(nopd policy|nopd chapter|paragraph)"))]


# dv_df_criminal.to_csv("data/output/dv_df_brady_criminal.csv", index=False)


# dv_df_criminal.allegation_desc.unique()

In [61]:

year_counts = dv_df.year.value_counts().sort_index()

year_pct_change = year_counts.pct_change() * 100

trace = go.Scatter(
    x=year_counts.index,
    y=year_counts.values,
    mode='lines+markers',
    name='Count',
    line=dict(width=2)
)

layout = go.Layout(
    title='Number of Domestic Allegations per Year',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Number of Allegations'),
    annotations=[
        dict(
            x=year,
            y=count,
            text=f"<b>{year}</b><br>Count: {count:,}<br>Change: {pct:+.2f}%",
            align='center',
            showarrow=False,
            font=dict(size=12, color='black'),
            bgcolor='rgba(255, 255, 255, 0.8)',
            bordercolor='black',
            borderwidth=1,
            borderpad=4,
            xanchor='center',
            yanchor='top' if idx % 2 == 0 else 'bottom',
            yshift=-20 if idx % 2 == 0 else 20
        )
        for idx, (year, count, pct) in enumerate(zip(year_counts.index, year_counts.values, year_pct_change.values))
    ]
)

fig = go.Figure(data=[trace], layout=layout)

fig.show()

In [62]:
# import pandas as pd
# import networkx as nx
# import plotly.graph_objects as go
# import plotly.io as pio

# dv_df_allegations = dv_df[["allegation_desc", "allegation"]]
# dv_df_allegations.allegation.value_counts()

# dv_df_allegations.loc[:, "allegation_desc"] = dv_df_allegations.allegation_desc.str.replace(r"^nopd policy:? ", "", regex=True)

# # Assuming dv_df_allegations is already defined
# top_10_desc = dv_df_allegations.groupby('allegation')['allegation_desc'].apply(lambda x: x.value_counts().nlargest(10)).reset_index(name='count')
# # Rename the column 'level_1' to 'allegation_desc' for consistency
# top_10_desc.rename(columns={'level_1': 'allegation_desc'}, inplace=True)

# # Iterate over unique high-level classifications
# for allegation in top_10_desc['allegation'].unique():
#     # Create a subgraph for each high-level classification
#     subgraph = nx.DiGraph()
#     top_desc = top_10_desc[top_10_desc['allegation'] == allegation]
    
#     # Add nodes and edges
#     for index, row in top_desc.iterrows():
#         subgraph.add_node(row['allegation'], color='green', size=20)
#         subgraph.add_node(row['allegation_desc'], color='purple', size=10)
#         subgraph.add_edge(row['allegation'], row['allegation_desc'])
    
#     # Create positions for nodes with a smaller k value
#     pos = nx.spring_layout(subgraph, k=0.1, iterations=50)
    
#     # Extract node attributes for Plotly
#     edge_x = []
#     edge_y = []
#     for edge in subgraph.edges():
#         x0, y0 = pos[edge[0]]
#         x1, y1 = pos[edge[1]]
#         edge_x.append(x0)
#         edge_x.append(x1)
#         edge_x.append(None)
#         edge_y.append(y0)
#         edge_y.append(y1)
#         edge_y.append(None)
    
#     edge_trace = go.Scatter(
#         x=edge_x, y=edge_y,
#         line=dict(width=0.5, color='#888'),
#         hoverinfo='none',
#         mode='lines')
    
#     node_x = []
#     node_y = []
#     node_text = []
#     node_color = []
#     node_size = []
#     for node in subgraph.nodes():
#         x, y = pos[node]
#         node_x.append(x)
#         node_y.append(y)
#         node_text.append(node)
#         node_color.append(subgraph.nodes[node]['color'])
#         node_size.append(subgraph.nodes[node]['size'])
    
#     node_trace = go.Scatter(
#         x=node_x, y=node_y,
#         mode='markers+text',
#         text=node_text,
#         textposition="top center",
#         textfont=dict(size=10),  # Increase the label size
#         hoverinfo='text',
#         marker=dict(
#             showscale=False,
#             color=node_color,
#             size=node_size,
#             line_width=2))
    
#     fig = go.Figure(data=[edge_trace, node_trace], layout=go.Layout(
#         title=f'Allegation Network for: {allegation}',
#         showlegend=False,
#         hovermode='closest',
#         margin=dict(b=20, l=20, r=20, t=40),  # Reduce margins for more space
#         width=800,  # Increase width
#         height=800,  # Increase height
#         xaxis=dict(showgrid=False, zeroline=False),
#         yaxis=dict(showgrid=False, zeroline=False)))
    
#     # Save the figure as an HTML file
#     fig.write_html(f'allegation_network_{allegation.replace(" ", "_")}.html')