In [None]:
# Load required libraries
library(tidyverse)

# Read the dataset
data <- read.csv("combined_data (1).csv")

# Remove rows with incomplete information about the number of followers
data <- data %>% drop_na(likes)
data <- data %>% drop_na(comments)
data <- data %>% drop_na(followers)


# Convert likes, comments, and followers to numeric
data <- data %>%
  mutate(
    likes = as.numeric(likes),
    comments = as.numeric(comments),
    followers = as.numeric(followers)
  )

# Calculate post engagement
data <- data %>% mutate(post_engagement = 100* (likes + comments) / (followers))

# Hypothesis Test 1: Does the type of content (video or not) affect post engagement?

# Subset the data for video posts
video_data <- data %>% filter(is_video == 'True')

# Subset the data for non-video posts
non_video_data <- data %>% filter(is_video == 'False')

In [None]:

# Check if there are enough observations for t-test
if (nrow(video_data) > 1 && nrow(non_video_data) > 1) {
  # Perform t-test
  t_test_result <- t.test(video_data$post_engagement, non_video_data$post_engagement)

  # Print the results
  cat("Hypothesis Test 1 (Video vs Non-Video):")
  print(t_test_result)
} else {
  cat("Not enough observations for Hypothesis Test 1.")
}


In [None]:
cat(var(non_video_data$post_engagement))

In [None]:
cat(var(video_data$post_engagement))

In [None]:
# Hypothesis Test 2: Does the number of images in a post affect post engagement for non-video posts?

# Subset the data for single image posts
single_image_data <- non_video_data %>% filter(multiple_images== 'False')

# Subset the data for multiple image posts
multiple_image_data <- non_video_data %>% filter(multiple_images == 'True')

# Check if there are enough observations for t-test
if (nrow(single_image_data) > 1 && nrow(multiple_image_data) > 1) {
  # Perform t-test
  t_test_result_2 <- t.test(single_image_data$post_engagement, multiple_image_data$post_engagement)

  # Print the results
  cat("Hypothesis Test 2 (Single Image vs Multiple Image):")
  print(t_test_result_2)
} else {
  cat("Not enough observations for Hypothesis Test 2.")
}


In [None]:
cat(var(single_image_data$post_engagement))

In [None]:
cat(var(multiple_image_data$post_engagement))

In [None]:
# Transform the post_engagement variable
sigmoid <- function(x) {
  #x = x*100
  1 / (1 + exp(-x))
}
data <- data %>% mutate(post_engagement_rate_sigmoid = sigmoid(post_engagement))

# Violin plot for Hypothesis Test 1 with sigmoid-transformed post_engagement rate
ggplot(data, aes(x = factor(is_video), y = post_engagement_rate_sigmoid, fill = factor(is_video))) +
  geom_violin() +
    stat_summary(fun = mean, geom = "point", color = "black", size = 3, position = position_dodge(0.75)) +  # Add mean point
  labs(title = "Post Engagement (sigmoid-Transformed): Video vs Non-Video",
       x = "Type of Content",
       y = "sigmoid(Post Engagement)") +
  scale_fill_manual(values = c("False" = "skyblue", "True" = "lightcoral"))

non_video_data <- non_video_data %>% mutate(post_engagement_rate_sigmoid = sigmoid(post_engagement))

# Violin plot for Hypothesis Test 2 with sigmoid-transformed post_engagement rate
ggplot(non_video_data, aes(x = factor(multiple_images), y = post_engagement_rate_sigmoid, fill = factor(multiple_images))) +
  geom_violin() +
    stat_summary(fun = mean, geom = "point", color = "black", size = 3, position = position_dodge(0.75)) +  # Add mean point
  labs(title = "Post Engagement (sigmoid-Transformed): Single Image vs Multiple Image (Non-Video)",
       x = "Number of Images",
       y = "sigmoid(Post Engagement)") +
  scale_fill_manual(values = c("False" = "lightgreen", "True" = "lightcoral"))

In [None]:
# Fit an ANOVA model
anova_result <- aov(post_engagement ~ is_video, data = data)

# Print the ANOVA table
print(summary(anova_result))

In [None]:
# Fit an ANOVA model
anova_result <- aov(post_engagement_rate_sigmoid ~ multiple_images, data = non_video_data)

# Print the ANOVA table
print(summary(anova_result))

In [None]:
# Convert multiple_images to a factor variable
non_video_data$multiple_images <- factor(non_video_data$multiple_images, levels = c("False", "True"))

# Fit an ANOVA model
anova_result <- aov(post_engagement ~ multiple_images, data = non_video_data)