<a href="https://colab.research.google.com/github/christianbentz/Workshop_DGfS2022/blob/main/Code/Application1/Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Visualization of Entropies
Author: Chris Bentz

Date: 18/02/2022



# Install Libraries
Some packages are already pre-installed on jupyter, but some need to be installed. Run this code to make sure that the packages/libraries needed to run this code are installed.

In [None]:
install.packages("ggplot2")
install.packages("ggrepel")

# Load Libraries

If the libraries are not installed yet, you need to install them using, for example, the command: install.packages("ggplot2").

In [None]:
library(ggplot2)
library(ggrepel)
library(dplyr)

# Load Data
Load unigram, bigram and trigram estimations, as well as the neural net estimations.

In [None]:
# ML estimations 
h.est <- read.csv(file = "/content/results/entropy_ML.csv")
head(h.est)
# Neural net estimations
h.neural.est <- read.csv(file = "/content/results/entropy_NeuralNet.csv")
head(h.neural.est)

## Merge Data
Merge the two data frames together for plotting in one panel.

In [None]:
# remove first two columns, since these are redundant.
h.neural.est.short <- select(h.neural.est, id:hrate.trigrams)
# merge data frames by id
h.est.combined <- merge(h.est, h.neural.est.short, by = "id")
head(h.est.combined)

# Scatterplots
Create scatterplots with entropy estimations on the x-axis and y-axis. This gives a visual impression where each text resides in terms of estimated entropies.

### Unigram Entropies

In [None]:
h.unigram.plot <- ggplot(h.est.combined, aes(x = h.unigrams, y = hrate.unigrams, 
                                        colour = subcorpus)) + 
  geom_point(alpha = 0.8, size  = 1) +
  geom_rug() +
  geom_label_repel(aes(label = id), label.size = 0.2, size = 3) + 
  labs(x = "Unigram Entropy for Characters (ML Estimate)", 
       y = "Unigram Entropy for Characters (Neural Estimate)")
h.unigram.plot

### Trigram Entropies

In [None]:
h.trigram.plot <- ggplot(h.est.combined, aes(x = h.trigrams, y = hrate.trigrams, 
                                        colour = subcorpus)) + 
  geom_point(alpha = 0.8, size  = 1) +
  geom_rug() +
  geom_label_repel(aes(label = id), label.size = 0.2, size = 3) + 
  labs(x = "Trigram Entropy for Characters (ML Estimate)", 
       y = "Trigram Entropy for Characters (Neural Estimate)") 
h.trigram.plot

# Safe Figures
Safe complete figures to file.


In [None]:
# unigrams
ggsave("/content/UnigramEntropyPlot.pdf", h.unigram.plot, dpi = 300, 
       scale = 1, device = cairo_pdf)
#trigrams
ggsave("/content/TrigramEntropyPlot.pdf", h.trigram.plot, dpi = 300, 
       scale = 1, device = cairo_pdf)