In [1]:
library(tidyverse)
library(networkD3)
library(htmlwidgets)
library(htmltools)

# Read all necessary files
offense_22 = read.csv("./data/DC-2022/NIBRS_OFFENSE.csv") 
offender_22 = read.csv("./data/DC-2022/NIBRS_OFFENDER.csv") 
victim_22 = read.csv("./data/DC-2022/NIBRS_VICTIM.csv")
weapon_22 = read.csv("./data/DC-2022/NIBRS_WEAPON.csv") 
injury_22 = read.csv("./data/DC-2022/NIBRS_VICTIM_INJURY.csv")

# Select offense_id, incident_id, offender_id, victim_id, offense_code, injury_id, weapon_id
offense_22 = offense_22 %>% select(2,3,4)
offender_22 = offender_22 %>% select(2,3)
victim_22 = victim_22 %>% select(2,3)
weapon_22 = weapon_22 %>% select(2,3)
injury_22 = injury_22 %>% select(2,3)

# Merge by incident_id, offense_id, victim_id
df_22 = merge(merge(merge(merge(offense_22, offender_22, by = "incident_id"), victim_22, by = "incident_id"), injury_22, by = "victim_id"), weapon_22, by = "offense_id")
# Remove incident_id, offense_id, victim_id, offender_id
df_22 = df_22 %>% select(-1,-2,-3,-5)

# Paste character to make ids unique
df_22$injury_id = paste0("i", df_22$injury_id)
df_22$weapon_id = paste0("w", df_22$weapon_id)

# Count the unique combinations of offense types and weapon types and subset if there are more than 100 cases
first_link = df_22 %>%
    group_by(offense_code, weapon_id) %>%
    summarise(value = n(), .groups = "drop") %>%
    arrange(desc(value)) %>%
    rename(source = offense_code, target = weapon_id) %>%
    filter(value > 100)
# Count the unique combinations of weapon types and injury types and subset if there are more than 100 cases
second_link = df_22 %>%
    group_by(weapon_id, injury_id) %>%
    summarise(value = n(), .groups = "drop") %>%
    arrange(desc(value)) %>%
    rename(source = weapon_id, target = injury_id) %>%
    filter(value > 100)
# Combine those two links
links.df = as.data.frame(rbind(first_link,second_link))

# Read codes files for nodes
offense_code = read.csv("./data/DC-2022/NIBRS_OFFENSE_TYPE.csv")
injury_code = read.csv("./data/DC-2022/NIBRS_INJURY.csv")
weapon_code = read.csv("./data/DC-2022/NIBRS_WEAPON_TYPE.csv")

# Get the codes and names
offense_code = offense_code %>% 
    select(1,2) %>%
    rename(name = offense_code, label = offense_name)
injury_code = injury_code %>% 
    select(1,3) %>%
    rename(name = injury_id, label = injury_name)
weapon_code = weapon_code %>% 
    select(1,3) %>%
    rename(name = weapon_id, label = weapon_name)

# Make codes unique
injury_code$name = paste0("i", injury_code$name)
weapon_code$name = paste0("w", weapon_code$name)
# Combine all the nodes
nodes.df = rbind(offense_code, injury_code, weapon_code)
# Subset only nodes from the links
nodes.df = nodes.df %>% filter(name %in% c(unique(first_link$source),unique(first_link$target),unique(second_link$target)))

# Create source_id and target_id for a sankey diagram
links.df$source_id = match(links.df$source, nodes.df$name) - 1 
links.df$target_id = match(links.df$target, nodes.df$name) - 1 

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.3     [32mv[39m [34mreadr    [39m 2.1.4
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.0
[32mv[39m [34mggplot2  [39m 3.4.4     [32mv[39m [34mtibble   [39m 3.2.1
[32mv[39m [34mlubridate[39m 1.9.3     [32mv[39m [34mtidyr    [39m 1.3.0
[32mv[39m [34mpurrr    [39m 1.0.2     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mi[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: 'htmlwidgets'


The following object is masked from 'package:networkD3':

    JS




In [3]:
nodes.df

name,label
<chr>,<chr>
11A,Rape
11D,Fondling
120,Robbery
13A,Aggravated Assault
13B,Simple Assault
520,Weapon Law Violations
i1,Apparent Broken Bones
i2,Possible Internal Injury
i3,Severe Laceration
i4,Minor Injury


In [8]:
nodes.df = nodes.df %>%
  mutate(group = ifelse(name == "13B", "a",
                          ifelse(name == "13A", "b", 
                                ifelse(name == "120", "c", "d")))) %>%
  mutate(group = ifelse(name == "w41", "a", "d")) %>%
  mutate(group = ifelse(name == "i4", "a", "d"))

source,target,value,source_id,target_id,group
<chr>,<chr>,<int>,<dbl>,<dbl>,<chr>
13B,w41,12155,4,17,a
120,w3,3181,2,12,c
13B,w38,2320,4,15,a
13A,w3,1563,3,12,b
13B,w42,1536,4,18,a
120,w41,1508,2,17,c
13A,w38,829,3,15,b
13A,w21,730,3,13,b
13A,w41,646,3,17,b
520,w3,609,5,12,d


https://stackoverflow.com/questions/46616321/modify-networkd3-sankey-plot-with-user-defined-colors
https://r-graph-gallery.com/322-custom-colours-in-sankey-diagram.html
https://coolors.co/palette/e63946-f1faee-a8dadc-457b9d-1d3557