visualisations.Rmd

---
title: "Performance visualisations"
author: "Anita Kurm"
date: "5/21/2020"
output:
  html_document: default
  pdf_document: default
---
# Set-up, data import
```{r}
pacman::p_load(tidyverse, rjson, extrafont)
font_import(prompt = FALSE, pattern = "Raleway") #you need to download the font for that 
df <- read_csv("processed_data/twitterQA_berts.csv")   # processing time data 
init <- read_csv("processed_data/initial11778.csv")   # initial dataset 
tok_times <- fromJSON(file = "processed_data/tokenizer_loading.txt")   # tokenizer loading time
mod_times <- fromJSON(file = "processed_data/model_loading.txt")       # model loading time

# Define color palette
cp <- c("aquamarine3", "grey19")

# See available fonts
#fonts()
```

# Initial dataset stats
```{r}
init %>% 
  mutate(Gold1_length = sapply(strsplit(Gold_1, " "), length),
         Gold2_length = ifelse(is.na(Gold_2), 0, sapply(strsplit(Gold_2, " "), length)),
         Gold_max_length = ifelse(Gold1_length>Gold2_length, Gold1_length, Gold2_length),
         Question_length = sapply(strsplit(Question, " "), length)) %>% 
  summarise(n(), mean(Gold_max_length), mean(Question_length))
```

# Data pre-processing
```{r}
# Reshape data by creating two separate dfs and binding together
l_bert <- df %>% 
  select(qid,
         X1,
         'Answer_pred' = L_BERT_answer,
         'Time' = L_BERT_time) %>% 
  mutate(A_len = str_length(Answer_pred),
         Model = "BERT",
         Tok_time = tok_times$L_BERT,
         Load_time = mod_times$L_BERT)

d_bert <- df %>% 
  select(qid,
         X1,
         'Answer_pred' = DistilBERT_answer,
         'Time' = DistilBERT_time) %>% 
  mutate(A_len = str_length(Answer_pred),
         Model = "DistilBERT", 
         Tok_time = tok_times$DistilBERT, 
         Load_time = mod_times$DistilBERT)

data <- rbind(l_bert, d_bert)

```

Get a dataframe with both answers present:
```{r}
df_present <- df %>% 
  filter(!is.na(L_BERT_answer) & !is.na(DistilBERT_answer))
#write_csv(df_present, "processed_data/tweetQA_bothpresent.csv")

d_present <- data %>% 
  filter(!is.na(data$Answer_pred))
```

# Evaluate data loss and processing time
Processing time summary (full dataset):
```{r}
# Summarise 
time_summary <- data %>%
  mutate(Tok_time = as.numeric(Tok_time),
         Load_time = as.numeric(Load_time)) %>% 
  group_by(Model) %>% 
  summarise(Missing = sum(is.na(Answer_pred)),
            Answered = sum(!is.na(Answer_pred)),
            Mean_time = mean(Time),
            'Max time' = max(Time),
            'Min time' = min(Time),
            'Total time' = sum(Time),
            'Tokenizer loading time' = max(Tok_time),
            'Model loading time' = max(Load_time),
            'Total time with loading' = sum(Time) + max(Tok_time)+ max(Load_time)) %>% 
  mutate_if(is.numeric, round, 3)

time_summary
```

Processing time summary (all present dataset):
```{r}
# Summarise 
time_summary <- d_present %>%
  filter(Time<10) %>%    # remove an outlier of 114s
  mutate(Tok_time = as.numeric(Tok_time),
         Load_time = as.numeric(Load_time)) %>% 
  group_by(Model) %>% 
  summarise(Missing = sum(is.na(Answer_pred)),
            Answered = sum(!is.na(Answer_pred)),
            Mean_time = mean(Time),
            "sd time" = sd(Time),
            'Max time' = max(Time),
            'Min time' = min(Time),
            'Total time' = sum(Time),
            'Tokenizer loading time' = max(Tok_time),
            'Model loading time' = max(Load_time),
            'Total time with loading' = sum(Time) + max(Tok_time)+ max(Load_time)) %>% 
  mutate_if(is.numeric, round, 3)

time_summary
```

# Visualisations
Time by model
```{r}
d_present <- d_present %>% 
  filter(Time<10)

# Density plot
density <- ggplot(d_present, aes(Time, fill = Model))+
  geom_vline(data=time_summary, aes(xintercept=Mean_time, color=Model),
             linetype="dashed")+
  geom_text(aes(x=0.080, label="\nMean: 0.080", y=20), colour="grey23", angle=90, size=4, family = "Raleway") +
  geom_text(aes(x=0.419, label="\nMean: 0.419", y=20), colour="aquamarine4", angle=90, size=4, family = "Raleway")+
  geom_density(col = NA, alpha = 0.7)+
  theme_bw()+
  scale_colour_manual(values=cp)+
  scale_fill_manual(values=cp)+
  labs( x = "Seconds/Question", 
        y = "Density",
       title = "Distribution of inference time in DistilBERT and large BERT",
       subtitle = "(Seconds/question)")+
  theme(text = element_text(family ="Raleway"))+
  xlim(0,1)

density
#Cumulative processing time plot
cumsum_l <- cumsum(l_bert$Time) 
cumsum_d <- cumsum(d_bert$Time) 

cumulative <-  ggplot()+
  geom_area(aes(1:length(cumsum_l), cumsum_l), fill = "aquamarine3", alpha = 0.5)+
  geom_text(aes(x=10500, label="BERT: 4919.511", y=5150), colour="aquamarine4", family = "Raleway", hjust="center") +
  geom_text(aes(x=10200, label="DistilBERT: 906.613", y=1200), colour="grey23", family = "Raleway", hjust="center") +
  geom_area(aes(1:length(cumsum_d), cumsum_d), fill = "grey23", alpha = 0.8)+
  theme_bw()+
  labs(x = "Number of Questions", 
       y = "Total Inference Time", 
       title = "Accumulated inference time by the length of the dataset",
       subtitle = "(Time in seconds)")+
  theme(text = element_text(family = "Raleway"), legend.title=element_blank())+
  ylim(0,5500)

cumulative

time_plots <- gridExtra::grid.arrange(density,cumulative, nrow=2)
ggsave("Plots/timeplots.png", time_plots, width = 8, height = 6)
```

## Linear models for stats
```{r}
# Predict time by model
time_by_model <- lm(Time ~ Model, d_present) 
summary(time_by_model)

t.test(Time~Model, d_present)

```


# Manual evaliation of performance metrics GLEU and METEOR
```{r}
d100 <- read_csv2("processed_data/df_samples_scores100_judged.csv")

```

```{r}
d100 <- d100 %>% 
  mutate(Gold1_length = sapply(strsplit(Gold_1, " "), length),
         Gold2_length = ifelse(is.na(Gold_2), 0, sapply(strsplit(Gold_2, " "), length)),
         Gold_max_length = ifelse(Gold1_length>Gold2_length, Gold1_length, Gold2_length),
         Meteor_correct = ifelse(JUDGE == 0, 1, 0),
         length_cat = ifelse(Gold_max_length <= 2, " up to 2 (incl)", "greater than 2"),
         length_cat2 = ifelse(Gold_max_length <= 3, " up to 3 (incl)", "greater than 3"))

d100 %>% 
  group_by(X11) %>% 
  summarise(n())

d100_compare <- d100 %>% 
  filter(!is.na(JUDGE))

d100_compare %>% 
  summarise('GLEU correct' = sum(JUDGE),
            'METEOR correct' = sum(Meteor_correct))

d100_compare %>% 
  group_by('Gold Standard Length' = length_cat) %>% 
  summarise('GLEU correct' = sum(JUDGE),
            'METEOR correct' = sum(Meteor_correct))

d100_compare %>% 
  group_by('Gold Standard Length' = length_cat2) %>% 
  summarise('GLEU correct' = sum(JUDGE),
            'METEOR correct' = sum(Meteor_correct))

d100_compare %>% 
  group_by('Gold Standard Length' = Gold_max_length) %>% 
  summarise('GLEU correct' = sum(JUDGE),
            'METEOR correct' = sum(Meteor_correct))
```


# Applying METEOR for long answers (3+ words) dataset and GLEU for short answers (max 2 words) dataset
```{r}
# Read in the data
long = read.csv("processed_data/df_long_answers.csv")
short = read.csv("processed_data/df_short_answers.csv")

# Take only necessary columns
long = select(long,7:10)
short = select(short,7:10)

# re-define color palette so it's consistent across plots
cp <- c("aquamarine3", "grey19")
```

Long
```{r}
bert = select(long,1,3)
bert['Model'] = 'BERT'
names(bert)[1] <- "METEOR"
names(bert)[2] <- "GLEU"
distil = select(long, 2,4)
distil['Model'] = 'DistilBERT'
names(distil)[1] <- "METEOR"
names(distil)[2] <- "GLEU"
data = rbind(bert, distil)
mu <- plyr::ddply(data, "Model", summarise, grp.mean=mean(METEOR))
mu
meteor <- ggplot(data, aes(x=METEOR, fill=Model)) +
  geom_density(col = NA, alpha=0.6, position="identity") +
  geom_vline(data=mu, aes(xintercept=grp.mean, color=Model),
             linetype="dashed") +
  geom_text(aes(x=0.387, label="\nMean: 0.387", y=2.5), colour="grey23", angle=90, size=4, family = "Raleway") +
  geom_text(aes(x=0.433	, label="\nMean: 0.433	", y=2.5), colour="aquamarine4", angle=90, size=4, family = "Raleway")+
  theme_bw() +
  ylim(0,3.5)+
  theme(text = element_text(family = "Raleway"))+
  scale_colour_manual(values=cp)+
  scale_fill_manual(values=cp)+
  labs(x = "METEOR score", 
       y = "Density")

meteor_by_model <- lm(METEOR ~ Model, data)
summary(meteor_by_model)
t.test(METEOR ~ Model, data)
```

Short
```{r}
bert = select(short,1,3)
bert['Model'] = 'BERT'
names(bert)[1] <- "METEOR"
names(bert)[2] <- "GLEU"
distil = select(short, 2,4)
distil['Model'] = 'DistilBERT'
names(distil)[1] <- "METEOR"
names(distil)[2] <- "GLEU"
data = rbind(bert, distil)
mu <- plyr::ddply(data, "Model", summarise, grp.mean=mean(GLEU))
mu

gleu <- ggplot(data, aes(x=GLEU, fill=Model)) +
  geom_density(col = NA, alpha=0.5, position="identity") +
  geom_vline(data=mu, aes(xintercept=grp.mean, color=Model),
             linetype="dashed") +
  geom_text(aes(x=0.535, label="\nMean: 0.535", y=2.5), colour="grey23", angle=90, size=4, family = "Raleway") +
  geom_text(aes(x=0.614	, label="\nMean: 0.614	", y=2.5), colour="aquamarine4", angle=90, size=4, family = "Raleway")+
  theme_bw() +
  xlim(0,1) + 
  ylim(0,3.5)+
  theme(text = element_text(family = "Raleway"))+
  scale_colour_manual(values=cp)+
  scale_fill_manual(values=cp)+
  labs(x = "GLEU score", 
       y = "Density")

gleu_by_model <- lm(GLEU ~ Model, data)
summary(gleu_by_model)

t.test(GLEU ~ Model, data)
  
```

```{r}
met_gl <- gridExtra::grid.arrange(gleu, meteor, nrow = 2)
ggsave("Plots/meteor_gleu.png", met_gl,width = 9, height = 8)
```


# Gather citations
```{r}
citation()
citation("tidyverse")
citation("rjson")
citation("extrafont")
```