From cead6e0030e39b3855b04e21d1fe9a5f29cc0735 Mon Sep 17 00:00:00 2001 From: Craig Parylo Date: Fri, 7 Jun 2024 13:56:40 +0100 Subject: [PATCH] adds third example (draft) using credit card applications --- vignettes/using_plotor.Rmd | 76 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/vignettes/using_plotor.Rmd b/vignettes/using_plotor.Rmd index af649e3..6ee84f3 100644 --- a/vignettes/using_plotor.Rmd +++ b/vignettes/using_plotor.Rmd @@ -288,3 +288,79 @@ plot_or(lr) `plot_or` recognises the use of labels and uses these in preference to variable names wherever available. Using variable labels makes plots easier to read and more accessible, and is especially useful where you want to include the chart in reports or publications. + +## Example 3 - credit card applications + +This example uses cross-sectional data on credit history for a sample of applicants for a credit card. + +```{r warning=FALSE, message=FALSE, fig.width=9, fig.height=5, fig.format='png', fig.retina=T} +library(AER) # source of example data + +# get the data +data('CreditCard') +df <- CreditCard |> + # convert age from decimal years to whole years + mutate(age = round(age, 0)) + +# create a list of variable = labels +var_labels <- list( + card = 'Card application accepted', + reports = 'No. derogatory reports', + age = 'Age (year)', + income = 'Yearly income (USD 10k)', + dependents = 'No. dependents', + months = 'Months at address', + majorcards = 'No. major credit cards', + active = 'No. credit accounts' +) + +# label the variables in our data +labelled::var_label(df) <- var_labels + +# conduct the logistic regression +lr <- glm( + data = df, + family = 'binomial', + formula = card ~ reports + age + income + dependents + months + majorcards + active +) + + # get the data from the model object + test_df <- summarise_rows_per_variable_in_model(model_results = lr) + + # get odds ratio and confidence intervals + model_or <- lr |> + broom::tidy(exponentiate = T, conf.int = T) + + # add the odds ratio and CIs to the summary dataframe + df <- test_df |> + dplyr::left_join( + y = model_or, + by = c('term') + ) + + # prepare the data for plotting + df <- prepare_df_for_plotting(df = df) + + # use labels where provided + df <- use_var_labels(df = df, lr = lr) + + # plot the results + p <- plot_odds_ratio(df = df, model = lr) + +plot_or(lr) + + labs('Likelihood for successful credit card application') +``` + +In this example each of the explanatory variables is numeric, unlike in the previous examples which used factor variables, which means the odds-ratios show the effect of an increase of one unit. + +There is no detected effect on likelihood of being successful based on age or the number of months at their current address. Both variables have confidence intervals that touch or cross the line of no effect. + +The variables most likely to increase likelihood of a successful application are: + +- The number of major credit cards already held by the applicant - each additional card increases the likelihood of acceptance by 1.67 times (a 67% increase), + +- The applicants annual income - an increase of \$10k per year increases the likelihood of acceptance by 1.27 times (a 27% increase), + +- The number of active credit accounts - each additional account increases the likelihood of acceptance by 1.15 times (a 15% increase). + +These findings make sense as the