_Does size matter? The effect of Instagram influencer account size on post sentiment and resulting marketing outcomes_

_Master's thesis by Thomas A. Frost_

# Part 10: Model Execution

This script is dependent on the existence of the data frames from Part 9. When running this script in Jupyter instead of RStudio, a saved R Environment file is loaded in the "Loading" section.

## 02 - Setup

In [None]:
library(tidyverse)
library(effectsize)
library(lme4)
library(lavaan)
library(fastDummies)
library(DescTools)

In [None]:
# THIS FILE REQUIRES THE DATA FRAMES (model1, ... ) GENERATED BY 09_MODELPREP_V3.R
load("09_ModelPrep.RData")

### Functions

In [1]:
getRegressionTable <- function(model) {
  df <- as.data.frame(coefficients(model))
  df$var <- rownames(df)
  rownames(df) <- 1:dim(df)[1]
  df <- data.frame(var = df$var, coefficients = paste(round(df$`coefficients(model)`, 4), " (", round(sqrt(diag(vcov(model))), 3), ")", sep = ""), p = round(summary(model)$coefficients[,4], 3))
  return(df)
}

outputTable<- function(df, filename) {
  write.table(df, file = paste(filename, ".txt", sep = ""), sep = ";", quote = FALSE, row.names = F)
  return(TRUE)
}

scaleNumeric <- function(vector) {
  if (is.numeric(vector)) {
    return(scale(vector))
  } else {
    return(vector)
  }
}

stars <- function(vector) {
  sapply(vector, function(v) {
    if(v < 0.001) {
      return("***")
    } else if(v < 0.01) {
      return('**')
    } else if(v < 0.05) {
      return('*')
    } else {
      return("")
    }
  }
  )
}

## 03 - Model 1

In [None]:
model1.z <- as.data.frame(lapply(model1, scaleNumeric))

model1.z$sentiment <- factor(model1$sentiment, levels = c("NEGATIVE", "POSITIVE"), ordered = TRUE)      # set as ordered to make the "dummy" of negative = 0 and positive = 1
model1.z$follower.class <- relevel(model1.z$follower.class, ref = "nano")
model1.z$gender <- relevel(model1.z$gender, ref = "f")
model1.z$follower.class <- relevel(model1.z$follower.class, ref = "nano")


logreg <- glm(sentiment ~ follower.class +
                gender +
                num.Followees +
                is.business + 
                is.verified +
                num.Posts +
                account.age +
                is.located +
                num.Hashtags +
                topic.post +
                num.Mentions +
                is.carousel +
                post.length,
              data = model1.z, family = binomial(link = "logit"))

summary(logreg)

results <- as.data.frame(parameters::model_parameters(logreg, ci_method = "wald", exponentiate = TRUE))
results$d <- oddsratio_to_d(results$Coefficient)
results$effect.size <- interpret_cohens_d(results$d, rules = "lovakov2021")


results$Coefficient <- round(results$Coefficient, 3)
results$SE <- round(results$SE, 3)
results$p <- round(results$p, 3)
results$d <- round(results$d, 3)

results$Coefficient <- paste(results$Coefficient, " (", results$SE, ")", sep = "")

results <- select(results, Parameter, Coefficient, p, d, effect.size)

outputTable(results, "logreg")

PseudoR2(logreg, which = "McFaddenAdj")

### extension logistic mixed model

In [None]:
model1.2 <- glmer(formula = sentiment ~ follower.class +
       gender +
       num.Followees +
       is.business + 
       is.verified +
       num.Posts +
       account.age +
       is.located +
       num.Hashtags +
       topic.post +
       num.Mentions +
       is.carousel +
       post.length +
       (1 + is.located + num.Hashtags + num.Mentions + is.carousel + post.length | username),
     data = model1.z, family = binomial(link = "logit"), control = glmerControl(optimizer = "nloptwrap"))

summary(model1.2)

results1.2 <- as.data.frame(parameters::model_parameters(model1.2, ci_method = "wald", exponentiate = FALSE))
results1.2$OddsRatio <- ''
results1.2[1:36,]$OddsRatio <- round(exp(results1.2[1:36,]$Coefficient),4)

## 04 - Model 2.1

In [None]:
model2.1 <- filter(model2.1, likes >= 0)                     # only keep posts that have public likes (!= -1)

model2.1.z <- as.data.frame(lapply(model2.1, scaleNumeric))

model2.1.dummies <- dummy_cols(model2.1.z, select_columns = c('gender', 'topic.post'), remove_first_dummy = FALSE)

model2.1.dummies$sentiment <- factor(model2.1.dummies$sentiment, ordered = TRUE, levels = c("NEGATIVE", "POSITIVE"))

model2.1.dummies <- select(model2.1.dummies, -one_of(c('gender_f', 'topic.post_Hair')))                  # remove specific reference level

colnames(model2.1.dummies) <- make.names(colnames(model2.1.dummies))



model2.1.def <- '
     sentiment ~ follower.count + gender_m + gender_i + gender_d + gender_g + num.Followees + is.business + is.verified + num.Posts + account.age + is.located + num.Hashtags + topic.post_Giveaway + topic.post_Beauty...Makeup + topic.post_TV.and.series + topic.post_Horoscope + topic.post_Basketball + topic.post_Happy...Celebrations + topic.post_U.S..politics + topic.post_Health + topic.post_Fashion + topic.post_Cooking + topic.post_Photography + topic.post_Stand.Up.Comedy + topic.post_Music.Festival + topic.post_Football + topic.post_Fitness + topic.post_Street.Fashion + topic.post_Michigan + num.Mentions + is.carousel + post.length
     engagement ~ sentiment + follower.count + gender_m + gender_i + gender_d + gender_g + num.Followees + is.business + is.verified + num.Posts + account.age + is.located + num.Hashtags + topic.post_Giveaway + topic.post_Beauty...Makeup + topic.post_TV.and.series + topic.post_Horoscope + topic.post_Basketball + topic.post_Happy...Celebrations + topic.post_U.S..politics + topic.post_Health + topic.post_Fashion + topic.post_Cooking + topic.post_Photography + topic.post_Stand.Up.Comedy + topic.post_Music.Festival + topic.post_Football + topic.post_Fitness + topic.post_Street.Fashion + topic.post_Michigan + num.Mentions + is.carousel + post.length
     engagement =~ likes + comments
'

model2.1.fit <- sem(model = model2.1.def,
           data  = model2.1.dummies,
           ordered = c("sentiment"))
summary(model2.1.fit, fit.measures = TRUE)
summary(model2.1.fit, fit.measures = TRUE, standardized = TRUE)

## Exporting coefficients to Microsoft Word


In [None]:
model2.1.coeff <- parameterEstimates(model2.1.fit)
model2.1.coeff$Parameter <- paste(model2.1.coeff$lhs, model2.1.coeff$op, model2.1.coeff$rhs, sep = "")


model2.1.std.all <- standardizedSolution(model2.1.fit, type = "std.all")
model2.1.std.all$Parameter <- paste(model2.1.std.all$lhs, model2.1.std.all$op, model2.1.std.all$rhs, sep = "")

model2.1.std.lv <- standardizedSolution(model2.1.fit, type = "std.lv")
model2.1.std.lv$Parameter <- paste(model2.1.std.lv$lhs, model2.1.std.lv$op, model2.1.std.lv$rhs, sep = "")

model2.1.output <- data.frame(
  Parameter = model2.1.coeff$Parameter,
  Coefficient = paste(round(model2.1.coeff$est, 3), " (", round(model2.1.coeff$se, 2), ")", sep=""),
  pvalue = round(model2.1.coeff$pvalue, 3)
)

model2.1.output <- model2.1.output[1:67,]

model2.1.output <- left_join(model2.1.output, select(model2.1.std.lv, Parameter, std.lv = est.std), by = "Parameter", na_matches = "never")
model2.1.output <- left_join(model2.1.output, select(model2.1.std.all, Parameter, std.all = est.std), by = "Parameter", na_matches = "never")

model2.1.output$std.lv <- round(model2.1.output$std.lv, 3)
model2.1.output$std.all <- round(model2.1.output$std.all, 3)

write_csv2(model2.1.output, "model2.1.output.txt")

## 05 - Model 2.2

In [None]:
model2.2 <- filter(model2.2, likes >= 0)                          # only keep posts that have public likes (!= -1)
model2.2 <- filter(model2.2, is.na(follower.class) == FALSE)

model2.2 <- filter(model2.2, gender %in% c("m", "f"))
model2.2$gender <- factor(model2.2$gender)

model2.2$engagement.likes <- model2.2$likes / model2.2$follower.count
model2.2$engagement.comments <- model2.2$comments / model2.2$follower.count

model2.2.z <- as.data.frame(lapply(model2.2, scaleNumeric))

model2.2.dummies <- dummy_cols(model2.2.z, select_columns = c('gender', 'topic.post'), remove_first_dummy = FALSE)

model2.2.dummies$is.business <- as.numeric(model2.2.dummies$is.business)
model2.2.dummies$is.verified <- as.numeric(model2.2.dummies$is.verified)
model2.2.dummies$is.located <- as.numeric(model2.2.dummies$is.located)
model2.2.dummies$is.carousel <- as.numeric(model2.2.dummies$is.carousel)


#model2.2.dummies$sentiment <- factor(model2.2.dummies$sentiment, ordered = TRUE, levels = c("NEGATIVE", "POSITIVE"))
model2.2.dummies$sentiment <- (model2.2.dummies$sentiment == "POSITIVE") * 1

model2.2.dummies <- select(model2.2.dummies, -one_of(c('gender_f', 'topic.post_Hair')))                  # remove specific reference level

colnames(model2.2.dummies) <- make.names(colnames(model2.2.dummies))



model2.2.def <- '
     sentiment ~ follower.count + gender_m + num.Followees + is.business + is.verified + num.Posts + account.age + is.located + num.Hashtags + topic.post_Giveaway + topic.post_Beauty...Makeup + topic.post_TV.and.series + topic.post_Horoscope + topic.post_Basketball + topic.post_Happy...Celebrations + topic.post_U.S..politics + topic.post_Health + topic.post_Fashion + topic.post_Cooking + topic.post_Photography + topic.post_Stand.Up.Comedy + topic.post_Music.Festival + topic.post_Football + topic.post_Fitness + topic.post_Street.Fashion + topic.post_Michigan + num.Mentions + is.carousel + post.length
     engagement.likes ~ sentiment + follower.count + gender_m + num.Followees + is.business + is.verified + num.Posts + account.age + topic.post_Giveaway + topic.post_Basketball + topic.post_Happy...Celebrations + topic.post_U.S..politics + topic.post_Health + topic.post_Fashion + topic.post_Cooking + topic.post_Photography + topic.post_Stand.Up.Comedy + topic.post_Music.Festival + topic.post_Football + topic.post_Fitness + topic.post_Street.Fashion + topic.post_Michigan + num.Mentions + is.carousel + post.length
'

model2.2.fit <- sem(model = model2.2.def,
                    data  = model2.2.dummies,
                    ordered = c("sentiment"),
                    group = "follower.class")



model2.2.fit.regressions <- sem(model = model2.2.def,
                                data  = model2.2.dummies,
                                ordered = c("sentiment"),
                                group = "follower.class",
                                group.equal = "regressions")


lavTestLRT(model2.2.fit, model2.2.fit.regressions, method = "satorra.2000")


model2.2.results <- parameterEstimates(model2.2.fit)

model2.2.table <- filter(model2.2.results, op == "~")              # only regression coefficients
model2.2.table <- data.frame(lhs = filter(model2.2.table, group == 1)$lhs,
                             rhs = filter(model2.2.table, group == 1)$rhs,
                             mikro = paste(round(filter(model2.2.table, group == 1)$est, 3), stars(filter(model2.2.table, group == 1)$pvalue)),
                             nano = paste(round(filter(model2.2.table, group == 2)$est, 3), stars(filter(model2.2.table, group == 2)$pvalue)),
                             midtier = paste(round(filter(model2.2.table, group == 3)$est, 3), stars(filter(model2.2.table, group == 3)$pvalue)),
                             makro = paste(round(filter(model2.2.table, group == 4)$est, 3), stars(filter(model2.2.table, group == 4)$pvalue)),
                             mega = paste(round(filter(model2.2.table, group == 5)$est, 3), stars(filter(model2.2.table, group == 5)$pvalue))
                             )


outputTable(model2.2.table, "model2.2.table")

summary(model2.2.fit)

## 06 - Study 2

In [None]:
# COPY FROM BWHPC (10_models_bwhpc.R)
load("study2_allmodels.RData")

In [None]:
results <- data.frame(
  All = select(
    as.data.frame(parameters::model_parameters(random_study2, ci_method = "wald", exponentiate = TRUE)),
    Parameter, Coefficient, p
  )
)

colnames(results)[1] <- "Parameter"

results <- left_join(
  results,
  data.frame(
    I = select(
      as.data.frame(parameters::model_parameters(random_study2_wo_followercount, ci_method = "wald", exponentiate = TRUE)),
      Parameter, Coefficient, p
    )
  ),
  by = join_by(Parameter == I.Parameter), na_matches = "never"
)

results <- left_join(
  results,
  data.frame(
    II = select(
      as.data.frame(parameters::model_parameters(random_study2_wo_followees, ci_method = "wald", exponentiate = TRUE)),
      Parameter, Coefficient, p
    )
  ),
  by = join_by(Parameter == II.Parameter), na_matches = "never"
)

results <- left_join(
  results,
  data.frame(
    III = select(
      as.data.frame(parameters::model_parameters(random_study2_wo_numposts, ci_method = "wald", exponentiate = TRUE)),
      Parameter, Coefficient, p
    )
  ),
  by = join_by(Parameter == III.Parameter), na_matches = "never"
)

results <- left_join(
  results,
  data.frame(
    IV = select(
      as.data.frame(parameters::model_parameters(random_study2_wo_accountage, ci_method = "wald", exponentiate = TRUE)),
      Parameter, Coefficient, p
    )
  ),
  by = join_by(Parameter == IV.Parameter), na_matches = "never"
)

results <- left_join(
  results,
  data.frame(
    V = select(
      as.data.frame(parameters::model_parameters(random_study2_wo_carousel, ci_method = "wald", exponentiate = TRUE)),
      Parameter, Coefficient, p
    )
  ),
  by = join_by(Parameter == V.Parameter), na_matches = "never"
)


results.print <- data.frame(
  Parameter = results[1:30,]$Parameter,
  All = paste(round(results[1:30,]$All.Coefficient, 4), stars(results[1:30,]$All.p)),
  I = paste(round(results[1:30,]$I.Coefficient, 4), stars(results[1:30,]$I.p)),
  II = paste(round(results[1:30,]$II.Coefficient, 4), stars(results[1:30,]$II.p)),
  III = paste(round(results[1:30,]$III.Coefficient, 4), stars(results[1:30,]$III.p)),
  IV = paste(round(results[1:30,]$IV.Coefficient, 4), stars(results[1:30,]$IV.p)),
  V = paste(round(results[1:30,]$V.Coefficient, 4), stars(results[1:30,]$V.p))
)

In [None]:
write_csv2(results.print, "odds-ratios-study2.txt", )