diff --git a/NAMESPACE b/NAMESPACE index 3a90b19..e8931cd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(is_plural) export(is_singular) export(list_templates) export(narrate_descriptive) +export(narrate_forecast) export(narrate_trend) export(narratorServer) export(narratorUI) diff --git a/R/edit_templates.R b/R/edit_templates.R index 37aeed1..96a47b7 100644 --- a/R/edit_templates.R +++ b/R/edit_templates.R @@ -7,27 +7,35 @@ edit_templates <- function() { templates <- list_templates() %>% dplyr::select(name, type, template) - # templates %>% - # dplyr::mutate(words = stringr::str_extract_all( - # template, - # "(?<=\\{)[^\\}]+") - # ) - - df <- narrator::sales %>% + df_descriptive <- narrator::sales %>% dplyr::group_by(Region, Product) %>% dplyr::summarise(Sales = sum(Sales)) + df_trend <- narrator::sales %>% + dplyr::mutate(Month = lubridate::floor_date(Date, unit = "months")) %>% + dplyr::group_by(Region, Product, Month) %>% + dplyr::summarise(Sales = sum(Sales)) + not.null <- function(x) { if (is.null(x)) return(FALSE) if (!is.null(x)) return(TRUE) } - narrative_output <- df %>% + narrative_descriptive <- df_descriptive %>% narrate_descriptive( return_data = TRUE, coverage = 0.4 ) + narrative_trend <- df_trend %>% + narrate_trend( + return_data = TRUE, + coverage = 0.4 + ) + + narrative_output <- c(narrative_descriptive, narrative_trend) + + # ui ---------------------------------------------------------------------- ui <- miniUI::miniPage( shiny::tags$style( type = 'text/css', @@ -47,22 +55,28 @@ edit_templates <- function() { label = "Export", class = "btn-warning"), + h3("Templates"), DT::DTOutput("template_table"), + h3("Updated Narrative"), DT::DTOutput("new_table") ) ) server <- function(input, output, session) { - data <- shiny::reactiveValues(data = templates) + rv <- shiny::reactiveValues( + data = templates, + template_type = NULL + ) shiny::observeEvent(input$edit,{ shiny::showModal( if (length(input$template_table_rows_selected) >=1 ) { - template_name <- toString(data$data[input$template_table_rows_selected, 1]) - template <- toString(data$data[input$template_table_rows_selected, 3]) + template_name <- toString(rv$data[input$template_table_rows_selected, 1]) + rv$template_type <- toString(rv$data[input$template_table_rows_selected, 2]) + template <- toString(rv$data[input$template_table_rows_selected, 3]) ind <- narrative_output %>% purrr::map(template_name) %>% @@ -85,14 +99,11 @@ edit_templates <- function() { dplyr::mutate(value = as.character(value)) %>% DT::datatable( rownames = FALSE, - #extensions = "Scroller", height = 800, options = list( pageLength = 10, dom = "t", - deferRender = TRUE#, - #scrollY = 200, - #scroller = TRUE + deferRender = TRUE ), class = 'cell-border stripe' ) @@ -117,12 +128,12 @@ edit_templates <- function() { i <- input$template_table_rows_selected template_new <- input$single_template - data$data[i, 3] <- template_new + rv$data[i, 3] <- template_new shiny::removeModal() }) shiny::observeEvent(input$export,{ - output <- data$data %>% + output <- rv$data %>% dplyr::mutate(result = glue::glue("{type}_{name} = {template} ")) %>% @@ -146,11 +157,12 @@ edit_templates <- function() { output$template_table <- DT::renderDataTable({ DT::datatable( - data$data, + rv$data, rownames = FALSE, extensions = "Scroller", selection = list(mode = "single"), options = list( + pageLength = 10, dom = "t", deferRender = TRUE, scrollY = 200, @@ -161,17 +173,36 @@ edit_templates <- function() { }) output$new_table <- DT::renderDataTable({ - templates_new <- data$data + req(rv$template_type) + + templates_descriptive <- rv$data %>% + dplyr::filter(type == "descriptive") + + templates_trend <- rv$data %>% + dplyr::filter(type == "trend") + + templates_new <<- templates_new + template_type <<- rv$template_type - do.call(narrator::narrate_descriptive, - c(list(df = df), split(templates_new$template, templates_new$name)) + output_descriptive <- do.call(narrator::narrate_descriptive, + c(list(df = df_descriptive), split(templates_descriptive$template, templates_descriptive$name)) ) %>% tibble::enframe(name = "Name", value = "Narrative") %>% - tidyr::unnest(cols = Narrative) %>% + tidyr::unnest(cols = Narrative) + + output_trend <- do.call(narrator::narrate_trend, + c(list(df = df_trend), split(templates_trend$template, templates_trend$name)) + ) %>% + tibble::enframe(name = "Name", value = "Narrative") %>% + tidyr::unnest(cols = Narrative) + + output_descriptive %>% + dplyr::bind_rows(output_trend) %>% DT::datatable( rownames = FALSE, extensions = "Scroller", options = list( + pageLength = 10, dom = "t", deferRender = TRUE, scrollY = 200, diff --git a/R/narrate_forecast.R b/R/narrate_forecast.R new file mode 100644 index 0000000..61e96fd --- /dev/null +++ b/R/narrate_forecast.R @@ -0,0 +1,285 @@ +#' Create Narrative for Time Series Forecast Data Frames +#' +#' @inheritParams narrate_trend +#' @param forecast Name of the forecast column in the data frame +#' @param actuals Name of the actuals column in the data frame +#' +#' @return A [list()] of narratives by default and [character()] if `simplify = TRUE` +#' @export +#' +#' @examples +#' library(prophet) +#' library(dplyr) +#' library(tidyr) +#' +#' fit_prophet <- function(data) { +#' model <- prophet(data) +#' future <- make_future_dataframe(model, periods = 12, freq = "month") +#' forecast <- predict(model, future) +#' return(forecast) +#' } +#' grouped_data <- sales %>% +#' dplyr::mutate(ds = lubridate::floor_date(Date, unit = "month")) %>% +#' dplyr::group_by(Region, ds) %>% +#' dplyr::summarise(y = sum(Sales, na.rm = TRUE)) %>% +#' tidyr::nest() +#' +#' grouped_data$forecast <- lapply(grouped_data$data, fit_prophet) +#' +#' actuals <- grouped_data %>% +#' dplyr::select(-forecast) %>% +#' unnest(data) +#' +#' df <- grouped_data %>% +#' dplyr::select(-data) %>% +#' unnest(forecast) %>% +#' dplyr::select(ds, yhat) %>% +#' dplyr::left_join(actuals) %>% +#' dplyr::rename(Actuals = y, +#' Forecast = yhat) +#' +#' narrate_forecast(df) +narrate_forecast <- function( + df, + measure = NULL, + dimensions = NULL, + date = NULL, + frequency = NULL, + summarization = "sum", + type = "yoy", + coverage = 0.5, + coverage_limit = 5, + narration_depth = 2, + use_chatgpt = FALSE, + openai_api_key = Sys.getenv("OPENAI_API_KEY"), + forecast = "Forecast", + actuals = "Actuals", + template_cy = "Forecasted volume for {current_year} is {format_num(cy_forecast)}", + template_ftm = "Overall forecast for the next 12 months is {format_num(ftm_forecast)}", + template_ftm_change = "Projected {trend} in the next 12 months is equal to {format_num(ftm_change)} ({ftm_change_p}%).", + use_renviron = FALSE, + return_data = FALSE, + simplify = FALSE, + format_numbers = TRUE, + collapse_sep = ", ", + collapse_last = " and ", + ...) { + + # Assertion --------------------------------------------------------------- + if (!is.data.frame(df) && !dplyr::is.tbl(df)) stop("'df' must be a data frame, tibble, or dplyr::tbl connection") + + df <- df %>% + dplyr::ungroup() + + if (!actuals %in% names(df)) stop(glue::glue("{actuals} must be a column in the data frame")) + if (!forecast %in% names(df)) stop(glue::glue("{forecast} must be a column in the data frame")) + + if (coverage_limit < 1) stop("'coverage_limit' must be higher or equal to 1") + if (coverage_limit%%1!=0) stop("'coverage_limit' must be an interger, no decimals allowed") + + if (coverage <= 0 || coverage > 1) stop("'coverage' must be more than 0 and less or equal to 1") + + # Calculating dimensions from a data.frame + if (is.null(dimensions)) { + dimensions <- df %>% + dplyr::select(where(is.character), where(is.factor)) %>% + names() + } else { + if (!all(dimensions %in% names(df))) { + stop("all dimensions must be columns the data frame (df)") + } + } + + # Checking dimensions data types + dimension_dtypes <- df %>% + dplyr::select(dplyr::all_of(dimensions)) %>% + head() %>% + dplyr::collect() %>% + lapply(class) + + if (!all(dimension_dtypes %in% c("character", "factor"))) { + stop(glue::glue("Data types for {toString(dimensions)} must be either 'character' or 'numeric', but is {toString(dimension_dtypes)}")) + } + + if (!class(df[[actuals]]) %in% c("numeric", "integer", "character", "factor")) { + stop(glue::glue("{actuals} must be a numeric column, but is {class(df[[actuals]])}")) + } + + if (!class(df[[forecast]]) %in% c("numeric", "integer", "character", "factor")) { + stop(glue::glue("{forecast} must be a numeric column, but is {class(df[[forecast]])}")) + } + + if (is.null(date)) { + date <- df %>% + dplyr::select_if(lubridate::is.timepoint) %>% + names() + + if (length(date) > 0) { + date <- date[1] + } else { + stop("Date column is required in 'Date', 'dttm', 'POSIXlt' or 'POSIXct' formats") + } + } + + if (!date %in% names(df)) stop("{date} is not a column in the data frame") + + if (!any(class(df[[date]]) %in% c("Date", "POSIXct", "POSIXlt"))) { + stop(glue::glue("{date} must be a numeric column, but is {class(df[[date]])[1]}")) + } + + if (is.null(frequency)) { + frequency <- narrator::get_frequency(df[date]) + } else { + df <- df %>% + dplyr::mutate(!!date := switch( + frequency, + "quarter" = lubridate::floor_date(base::get(date), unit = "quarter"), + "month" = lubridate::floor_date(base::get(date), unit = "month"), + "week" = lubridate::floor_date(base::get(date), unit = "week", week_start = 1), + "day" = base::get(date) + ) + ) + } + + # Renviron ---------------------------------------------------------------- + # Getting Environment Variables if available + # Candidate for a helper function + if (!is.null(getOption("narrator.use_renviron"))) { + use_renviron <- getOption("narrator.use_renviron") + } else if (Sys.getenv("use_renviron") != "") { + use_renviron <- Sys.getenv("use_renviron") + } + + if (use_renviron == TRUE) { + if (Sys.getenv("forecast_template_cy") != "") { + template_cy <- Sys.getenv("forecast_template_cy") + } + + if (Sys.getenv("forecast_template_ftm") != "") { + template_ftm <- Sys.getenv("forecast_template_ftm") + } + + if (Sys.getenv("forecast_template_ftm_change") != "") { + template_ftm <- Sys.getenv("forecast_template_ftm_change") + } + } + + # Total ------------------------------------------------------------------- + # Transforming dttm or similar formats to date and avoiding time zone issues + df <- df %>% + dplyr::mutate(!!date := as.Date(base::get(date))) + + # Current Year + max_actuals_date <- df %>% + dplyr::filter(!is.na(Actuals)) %>% + dplyr::select(dplyr::all_of(date)) %>% + as.matrix() %>% + as.Date() %>% + max() + + next_date <- switch( + frequency, + "week" = max_actuals_date + lubridate::weeks(1), + "month" = max_actuals_date + months(1), + "quarter" = max_actuals_date + months(3) + ) + + max_forecast_date <- df %>% + dplyr::select(dplyr::all_of(date)) %>% + as.matrix() %>% + as.Date() %>% + max() + + current_year <- lubridate::year(max_actuals_date) + + # If current year is complete and nothing was forecast we add different narrative + if (lubridate::year(next_date) == current_year + 1) { + template_cy <- "Actuals for {current_year} are equal to {format_num(cy_forecast)}" + } + + cy_forecast <- df %>% + dplyr::filter(lubridate::year(base::get(date)) == current_year) %>% + dplyr::mutate(Forecast = ifelse(is.na(Actuals), Forecast, Actuals)) %>% + dplyr::summarise(Forecast = sum(Forecast, na.rm = TRUE)) %>% + as.numeric() %>% + round() + + narrative_cy <- glue::glue(template_cy, ...) + + narrative <- list(narrative_cy) %>% + rlang::set_names("Current Year Actuals") + + variables <- list( + template_cy = narrative_cy, + current_year = current_year, + max_actuals_date = max_actuals_date, + max_forecast_date = max_forecast_date, + frequency = frequency, + cy_forecast = cy_forecast) + + # Next 12 months + # In case we have at least one year of forecast values narrate FTM + # We count it as at least 52 weeks + if (as.numeric(max_forecast_date - max_actuals_date)/7 >= 52 & !is.null(template_ftm)) { + ftm_forecast <- df %>% + dplyr::filter(base::get(date) > max_actuals_date, + base::get(date) <= max_actuals_date + months(12)) %>% + dplyr::summarise(Forecast = sum(Forecast, na.rm = TRUE)) %>% + as.numeric() %>% + round() + + narrative_ftm <- glue::glue(template_ftm, ...) + + narrative <- list(narrative_ftm) %>% + rlang::set_names(glue::glue("Overall {forecast} in the next 12 months")) %>% + append(narrative, after = 0) + + variables <- append(variables, list(narrative_ftm = narrative_ftm), 1) + variables <- append(variables, list(template_ftm = template_ftm), 1) + variables <- append(variables, list(ftm_forecast = ftm_forecast)) + + if (!is.null(template_ftm_change)) { + ltm_actuals <- df %>% + dplyr::filter(!is.na(Actuals), + base::get(date) > max_actuals_date - months(12)) %>% + dplyr::summarise(Forecast = sum(Forecast, na.rm = TRUE)) %>% + as.numeric() %>% + round() + + trend <- ifelse(ftm_forecast > ltm_actuals, "increase", "decrease") + ftm_change <- ftm_forecast - ltm_actuals + ftm_change_p <- round(ftm_change/ltm_actuals*100, 1) + + narrative_ftm_change <- glue::glue(template_ftm_change, ...) + + narrative <- list(narrative_ftm_change) %>% + rlang::set_names(glue::glue("Overall {trend} the next 12 months")) %>% + append(narrative, after = 0) + + variables <- append(variables, list(narrative_ftm_change = narrative_ftm_change), 1) + variables <- append(variables, list(template_ftm_change = template_ftm_change), 1) + variables <- append(variables, + list(trend = trend, + ltm_actuals = ltm_actuals) + ) + } + } + + # ChatGPT ----------------------------------------------------------------- + if (use_chatgpt) { + narrative <- enhance_narrative(narrative, openai_api_key = openai_api_key) + } + + # Output ------------------------------------------------------------------ + if (return_data == TRUE) { + return(variables) + } + + if (simplify == TRUE) { + narrative <- as.character(narrative) + variables$narrative <- as.character(variables$narrative) + } + + return(narrative) +} + diff --git a/README.md b/README.md index 430c045..82c3342 100644 --- a/README.md +++ b/README.md @@ -135,30 +135,27 @@ narrative_enhanced <- enhance_narrative(narrative_one) cat(narrative_enhanced) ``` -The company has achieved a remarkable Total Sales figure of 38790478.4 -across all Regions. Upon further analysis, it is evident that the -Outlying Regions, such as NA and EMEA, have contributed significantly to -this commendable feat. NA has recorded an impressive figure of -18079736.4, which accounts for 46.6% of the overall sales, whereas EMEA -has recorded 13555412.7, which is 34.9% of the total sales. - -Upon examination of the NA market, it is observed that Food & Beverage -has emerged as the leading product category, accounting for a -substantial figure of 7392821, which is 40.9% of the sales. Furthermore, -Electronics follows suit, with an impressive contribution of 3789132.7, -which is 21% of the total sales. - -Similarly, the EMEA market depicts Food & Beverage as the dominant -player in terms of sales figures, accounting for a whopping 5265113.2, -which constitutes 38.8% of the total sales. Electronics comes in a close -second, contributing 3182803.4, which is 23.5% of the sales. - -Additionally, Food & Beverage emerges as the major contributor to -Outlying Products by Sales, with a massive figure of 15543469.7, which -accounts for a significant 40.1% of the overall sales. Electronics -follows suit, contributing 8608962.8, which is 22.2% of the total sales. -These numbers highlight the significance of these products in the -overall success of the company’s sales figures. +The company’s Total Sales across all Regions have reached a remarkable +figure of 38790478.4. Outlying Regions by Sales are NA, which has +generated sales figures of 18079736.4, accounting for 46.6% of the total +sales. Following closely is EMEA, responsible for sales figures of +13555412.7, representing 34.9% of the total sales. + +Analysing the sales figures for the NA market, we can identify two +significant products that have achieved excellent results. Food & +Beverage led the way with an outstanding 7392821 in sales, comprising +40.9% of the market share. Electronics followed closely with sales +figures of 3789132.7, accounting for an estimated 21% market share. + +In the EMEA market, we also see impressive figures of sales for the Food +& Beverage sector with 5265113.2 accounting for 38.8% market share, and +Electronics achieving sales of 3182803.4, accounting for an estimated +23.5% of the market. + +Combined, Food & Beverage and Electronics products have driven the +Outlying Products by Sales with notable figures of 15543469.7 and +8608962.8, which represent 40.1% and 22.2% of the market share, +respectively. ### Translation @@ -170,28 +167,26 @@ translation <- translate_narrative(narrative_enhanced, language = "Czech") cat(translation) ``` -Společnost dosáhla pozoruhodného celkového tržby ve výši 38790478,4 -všech regionů. Z další analýzy je jasné, že odlehlé regiony, jako jsou -NA a EMEA, významně přispěly k tomuto pochvalnému úspěchu. NA -zaznamenala skvělou částku 18079736,4, což představuje 46,6 % celkových -prodejů, zatímco EMEA zapsala 13555412,7, což znamená 34,9 % celkových +Celkové Spojené Prodeje Společnosti napříč všemi regiony dosáhly +pozoruhodné sumy 38790478.4. Regiony nad rámec očekávání jsou Severní +Amerika (NA), která vygenerovala prodej ve výši 18079736.4, což +představuje 46,6% celkového prodeje. Těsně na druhém místě je EMEA, +zodpovědná za prodej ve výši 13555412.7, což odpovídá 34,9% celkových prodejů. -Při zkoumání NA trhu je pozorováno, že potraviny a nápoje se staly -vedoucí kategorií produktů, představující podstatnou částku 7392821, což -je 40,9 % prodejů. Elektronika následuje, s impozantním přínosem -3789132,7, což je 21 % celkových prodejů. +Analýzou prodejů na trhu Severní Ameriky můžeme identifikovat dva +významné produkty, které dosáhly vynikajících výsledků. Vedením šla +kategorie Jídla a Nápojů se skvělými prodeji za 7392821, což tvoří 40.9% +tržního podílu. Elektronika následovala blízko za prodejovými čísly ve +výši 3789132.7, což odpovídá přibližně 21% tržního podílu. -Podobně se na trhu EMEA ukazuje, že potraviny a nápoje jsou dominantním -hráčem z hlediska prodejních čísel, představující ohromující 5265113,2, -což představuje 38,8 % celkových prodejů. Elektronika následuje těsně za -ní, přispívající 3182803,4, což je 23,5 % prodejů. +Na trhu EMEA vidíme také impozantní prodejová čísla od kategorie Jídla a +Nápojů (5265113.2), což tudíž tvoří 38.8% tržního podílu, a Elektronika +s prodejem ve výši 3182803.4, což představuje 23.5% tržního podílu. -Dodatečně se potraviny a nápoje stávají hlavním přispěvatelem k prodejům -odlehlých produktů, s obrovskou částkou 15543469,7, což představuje -významných 40,1 % celkových prodejů. Elektronika následuje s přínosem -8608962,8, což je 22,2 % celkových prodejů. Tyto čísla zdůrazňují význam -těchto produktů v celkovém úspěchu prodejů společnosti. +Celkově kategorie Jídla a Nápojů a Elektronika pohánějí Outlying +Produkty s významnými čísly ve výši 15543469.7 a 8608962.8, což odpovídá +40.1% a 22.2% tržního podílu. ### Summarization @@ -203,12 +198,12 @@ summarization <- summarize_narrative(narrative_enhanced) cat(summarization) ``` -The company achieved total sales of \$38,790,478.4 with contributions -from NA and EMEA regions. Food & Beverage dominated both markets with -highest sales figures, totaling \$15,543,469.7 and \$5,265,113.2, -respectively. Electronics followed with \$8,608,962.8 and \$3,182,803.4 -in sales. These products are significant for the company’s overall sales -success. +Total Sales of 38790478.4 have been achieved by the company across all +regions. NA and EMEA are the top regions with sales figures of +18079736.4 and 13555412.7, respectively. Food & Beverage and Electronics +are the leading products, with impressive sales figures in both regions +driving the market share. They account for 40.1% and 22.2% of the market +share respectively, combining to contribute to the overall success. # Python diff --git a/_pkgdown.yml b/_pkgdown.yml index 62fe0b4..47b58af 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -62,6 +62,7 @@ articles: contents: - descriptive - trend + - forecast - title: Customization desc: Making edits and formatting of the narrative outputs diff --git a/docs/404.html b/docs/404.html index 5e22ae9..ddca6df 100644 --- a/docs/404.html +++ b/docs/404.html @@ -44,6 +44,7 @@