Can I come up with a better way to create the "signatures" for an anagram utility?  Ideally, I want them to be created in a vectorized manner, not via looping.

In [183]:
word_df <- data.frame("word" = c("Lois", "oils", "silo", "merger"), stringsAsFactors = FALSE)

# previous attempt.  Slow.
signatures <- lapply(word_df$word, 
                     function(x) 
                     {paste0(sort(unlist(strsplit(tolower(x), ""))), collapse = "")}
                    )

signatures

In [184]:
word_df$lower_case <- tolower(word_df$word)

head(word_df)

Unnamed: 0_level_0,word,lower_case
Unnamed: 0_level_1,<chr>,<chr>
1,Lois,lois
2,oils,oils
3,silo,silo
4,merger,merger


In [185]:
splits <- strsplit(word_df$lower_case, "", )
splits

In [199]:
# So I think what I want to do is to process each value in the "splits" list
# and then assign it by index position to a new column in the "word_df"

n <- length(word_df$word)

# pre-allocate a new vector in the dataframe to write to that is the length and type we need
word_df$sorted <- vector("character", n)

for (i in 1:n) {
    word_df$sorted[i] <- paste0(sort(splits[[i]]), collapse = "")
}

word_df

word,lower_case,sorted
<chr>,<chr>,<chr>
Lois,lois,ilos
oils,oils,ilos
silo,silo,ilos
merger,merger,eegmrr


In [186]:
# How I was timing things before
system.time(
    for (i in 1:100000) 
        lapply(word_df$word, function(x) {paste0(sort(unlist(strsplit(tolower(x), ""))), collapse = "")})
            ) / 100000

      user     system    elapsed 
0.00014251 0.00000005 0.00014306 

In [191]:
# Another way to get the sorted letters in the word

letters <- c("a", "b", "c", "d", "e", "f", "g", "h", "i", 
             "j", "k", "l", "m", "n", "o", "p","q", "r", 
             "s", "t", "u", "v", "w", "x", "y", "z")

letters %in% unlist(strsplit('lois', ""))

letters[letters %in% unlist(strsplit(splits[[1]], ""))]