_Does size matter? The effect of Instagram influencer account size on post sentiment and resulting marketing outcomes_

_Master's thesis by Thomas A. Frost_

# Part 5: Language Detection

## 02 - Setup

In [None]:
library(tidyverse)
library(fastText)
library(readxl)

## 03 - Data Import

In [None]:
dataset <- read_csv("../data/finaldataset_v2.csv", col_types = "ccnccnffnnfnf")
topics_account <- read_csv("../data/topics_account.csv", col_types = "fc")
topics_posts <- read_csv("../data/topics_posts.csv", col_types = "fc")

## 04 - Detection

In [None]:
large_model = "../lid.176.bin"

language = language_identification(input_obj = dataset$Text,
                                   pre_trained_language_model_path = large_model,
                                   verbose = TRUE)

language$iso_lang_1 <- as.factor(language$iso_lang_1)

summary(language$iso_lang_1)

postlang <- cbind(dataset, language$iso_lang_1)
colnames(postlang)[28] <- "language"
postlang_non_en <- filter(postlang, language != "en")

## 05 - Export for manual check in Excel


In [None]:
write_excel_csv(postlang_non_en, "../data/check-for-non-english.csv")

## 06 - Implement manual changes

In [None]:
manual_non_en <- read_excel("../data/05-1_LanguageDetection.xlsx")

manual_non_en <- manual_non_en[-1]

dataset2 <- left_join(postlang, manual_non_en, by = "Post.ID", keep = TRUE, na_matches = "never")

dataset2 <- filter(dataset2, (language.x == "en" | language.y == "en"))
dataset2 <- dataset2[1:27]

colnames(dataset2)[1] <- "Post.ID"
colnames(dataset2)[5] <- "Text"

## 07 - delete duplicate post

In [None]:
dataset2 <- dataset2[-45609,]
rownames(dataset2) <- 1:dim(dataset2)[1]

## 08 - Adding Topics

In [None]:
colnames(topics_account) <- c("label", "Username")

dataset3 <- left_join(dataset2, topics_account, by = "Username", keep = FALSE, na_matches = "never")
colnames(dataset3)[28] <- "Topic.Account"

dataset3 <- left_join(dataset3, topics_posts, by = "Post.ID", keep = FALSE, na_matches = "never")
colnames(dataset3)[29] <- "Topic.Post"

## 09 - Finally export the final dataset

In [None]:
write_csv(dataset3, "../data/finaldataset_v3.csv")