In [None]:
install.packages("RedditExtractoR") 
install.packages("dplyr")

In [None]:
library(RedditExtractoR) 
library(dplyr)

In [None]:
crawl_reddit_comments <- function(
    keyword, 
    num_threads = 10,  
    min_comments = 0,     
    date_from = NULL,         
    date_to = NULL,         
    sort_by = "top",    
    output_file = "reddit_comments.csv"
) {
  cat("Finding threads with keyword:", keyword, "\n")
  threads <- find_thread_urls(
    keywords = keyword,
    sort_by = sort_by
  )
  
  if (!is.null(date_from)) {
    threads <- threads[threads$date_utc >= date_from, ]
  }
  if (!is.null(date_to)) {
    threads <- threads[threads$date_utc <= date_to, ]
  }
  
  if (nrow(threads) == 0) {
    stop("No threads found with the given criteria")
  }
  
  if (min_comments > 0) {
    threads <- threads[threads$comments >= min_comments, ]
  }
  
  threads <- threads[1:min(num_threads, nrow(threads)), ]
  
  cat("Found", nrow(threads), "threads matching criteria\n")
  
  all_comments <- data.frame()
  
  for (i in 1:nrow(threads)) {
    cat(sprintf("Processing thread %d of %d...\n", i, nrow(threads)))
    tryCatch({
      content <- get_thread_content(threads$url[i])
      if (!is.null(content$comments) && nrow(content$comments) > 0) {
        comments <- content$comments
        comments$thread_title <- content$threads$title[1]
        all_comments <- rbind(all_comments, comments)
      }
    }, error = function(e) {
      cat("Error processing thread:", threads$url[i], "\n")
    })
    
    Sys.sleep(2)
  }
  
  if (nrow(all_comments) == 0) {
    stop("No comments found in the processed threads")
  }
  
  final_comments <- all_comments[, c(
    "thread_title",    # Thread title
    "comment",         # Comment content
    "author",          # Comment author
    "date",           # Comment date
    "score",          # Comment score
    "url",            # Thread URL
    "comment_id"      # Comment ID
  )]
  
  cat("Saving", nrow(final_comments), "comments to:", output_file, "\n")
  write.csv(final_comments, output_file, row.names = FALSE, fileEncoding = "UTF-8")
  
  cat("Done!\n")
  return(final_comments)

In [None]:
crawl_reddit_comments(
  keyword = "review",
  num_threads = 1,
   min_comments = 10,
   date_from = "2024-01-01",
   date_to = "2024-11-16",
   sort_by = "top",
   output_file = "comments.csv"
 )