In [1]:
library(data.table)

In [2]:
## Using a table of article assessments and views, build tables
## (matrices) that shows the number of dissonant articles per
## assessment category based on sorting by popularity.
##
## The underlying assumption is that in an ideal system with a limited
## and fixed amount of resources (in other words, popularity and high quality
## artefacts does not increase the amount of resources in the system),
## popularity ranking and assessment class follow a 1-to-1 relationship.
## We can therefore sort by popularity and group articles that way
## because work will be prioritised by popularity.

## DATA ASSUMPTION: views_with_redirects from resolve-redirects.R
## is loaded into memory.

## 3: build a 2x2 matrix of assessment classes and popularity classes
## 

## Assessment classes in ascending order of quality.

In [3]:
assessment_classes = c('E', 'D', 'C', 'B', 'A');

In [4]:
quality_prediction_and_page_views <- read.table("../../results/sql_queries/entity_views_and_aggregated_revisions/entity_views_and_aggregated_revisions_and_quality_scoring_20141101.tsv", header=FALSE, sep="\t")

In [5]:
quality_prediction_and_page_views <- data.table(quality_prediction_and_page_views)

In [6]:
colnames(quality_prediction_and_page_views) <- c('entity_id','number_of_revisions', 'page_views', 'prediction')

In [7]:
summary(quality_prediction_and_page_views)

    entity_id        number_of_revisions   page_views        prediction  
 Q1      :       1   Min.   :    1.00    Min.   :0.000e+00   A:      68  
 Q100    :       1   1st Qu.:    9.00    1st Qu.:4.100e+01   B:   29820  
 Q1000   :       1   Median :   17.00    Median :2.970e+02   C: 1499765  
 Q10000  :       1   Mean   :   21.91    Mean   :3.314e+04   D: 2605369  
 Q100000 :       1   3rd Qu.:   28.00    3rd Qu.:1.714e+03   E:11388425  
 Q1000000:       1   Max.   :10032.00    Max.   :1.253e+10               
 (Other) :15523441                                                       

In [8]:
## 0: calculate number of articles in each assessment class
n_per_class = quality_prediction_and_page_views[, list(narticles=sum(.N)), by='prediction']

In [9]:
setkey(n_per_class, prediction);
## NOTE: setkey allows us to do n_per_class['GA']$narticles to get counts

In [10]:
## 1: order articles by popularity
articles_by_pop = quality_prediction_and_page_views[order(quality_prediction_and_page_views$page_views)][,list(entity_id, prediction, page_views)];



In [11]:
## 2: assign popularity assessment class based on rank
##   (buckets based on number of articles in each class)
articles_by_pop[, pop_class := ''];
articles_by_pop[, seqNum := seq_len(nrow(articles_by_pop))];

entity_id,prediction,page_views,pop_class
Q10040378,E,0,
Q10069140,E,0,
Q10081695,E,0,
Q10092002,E,0,
Q10111267,E,0,
Q10149726,E,0,
Q10180230,E,0,
Q10185035,E,0,
Q10205202,E,0,
Q10252966,E,0,


entity_id,prediction,page_views,pop_class,seqNum
Q10040378,E,0,,1
Q10069140,E,0,,2
Q10081695,E,0,,3
Q10092002,E,0,,4
Q10111267,E,0,,5
Q10149726,E,0,,6
Q10180230,E,0,,7
Q10185035,E,0,,8
Q10205202,E,0,,9
Q10252966,E,0,,10


In [12]:
assign_pop_class = function(dataset, classes, class_n) {
  ## Based on the per-class number of articles in class_n
  ## assign popularity based on classes to dataset.
  prev_idx = 0;
  for(rating in classes) {
    start_idx = prev_idx + 1;
    end_idx = start_idx + class_n[prediction == rating]$narticles;
    print(paste('start_idx =', start_idx, ', end_idx = ', end_idx));
    dataset[seqNum >= start_idx & seqNum <= end_idx, pop_class := rating];
    prev_idx = end_idx -1;
  }
  dataset;
}

In [13]:
articles_by_pop = assign_pop_class(articles_by_pop,
  assessment_classes, n_per_class);

[1] "start_idx = 1 , end_idx =  11388426"
[1] "start_idx = 11388426 , end_idx =  13993795"
[1] "start_idx = 13993795 , end_idx =  15493560"
[1] "start_idx = 15493560 , end_idx =  15523380"
[1] "start_idx = 15523380 , end_idx =  15523448"


In [14]:
create_dissonance_matrix = function(articledata, classes) {
  d_mtrx = matrix(0, nrow=length(classes), ncol=length(classes));
  rownames(d_mtrx) = classes;
  colnames(d_mtrx) = classes;

  for(real_rating in classes) {
    for(pop_rating in classes) {
      d_mtrx[real_rating, pop_rating] = length(articledata[prediction == real_rating & pop_class == pop_rating]$entity_id);
    }
  }
  d_mtrx;
}


In [15]:
## Based on direct hits to articles:
create_dissonance_matrix(articles_by_pop, assessment_classes)

Unnamed: 0,E,D,C,B,A
E,8648309,1864865,870432,4811,8
D,2011418,356900,233093,3948,10
C,718961,375004,385499,20253,48
B,9737,8598,10737,748,0
A,0,2,4,60,2


In [16]:
dissonance_matrix = create_dissonance_matrix(articles_by_pop,
  assessment_classes);

In [17]:
# Total misaligned entities
(dissonance_matrix[1,1]+dissonance_matrix[2,2]+dissonance_matrix[3,3]+dissonance_matrix[4,4]+dissonance_matrix[5,5])/sum(dissonance_matrix[,])

In [18]:
# A class quality and A class views over A class quality
dissonance_matrix[5,5]/sum(dissonance_matrix[5,])

In [19]:
# A class quality and E and D class views over A class quality
(dissonance_matrix[5,1]+dissonance_matrix[5,2])/sum(dissonance_matrix[5,])

In [20]:
# A class quality and < A class views
(dissonance_matrix[5,1]+dissonance_matrix[5,2]+dissonance_matrix[5,3]+dissonance_matrix[5,4])/sum(dissonance_matrix[5,])

In [21]:
# < A class quality and A class views
(dissonance_matrix[1,5]+dissonance_matrix[2,5]+dissonance_matrix[3,5]+dissonance_matrix[4,5])/sum(dissonance_matrix[,5])

In [22]:
prediction_e_pop_class_a <- merge(articles_by_pop[prediction == 'E' & pop_class == 'A'],quality_prediction_and_page_views, by='entity_id')[, c("entity_id","page_views.x", "number_of_revisions")]

In [23]:
head(prediction_e_pop_class_a)

entity_id,page_views.x,number_of_revisions
Q1002972,2045659200,30
Q156376,2046132338,83
Q1868372,2056080224,45
Q219523,2045690113,73
Q2638147,2045739408,25
Q372827,2045652543,36


In [24]:
## Q: why do I get _two_ pageid columns?  Solution is to do the selection
## on the joined table, not as a select _in_ the join.

## Dissonance matrix proportions by row (..., 1) and column (..., 2)
## rounded to 1 decimal places.

In [25]:
round(100*prop.table(dissonance_matrix, 1), 1);

Unnamed: 0,E,D,C,B,A
E,75.9,16.4,7.6,0.0,0.0
D,77.2,13.7,8.9,0.2,0.0
C,47.9,25.0,25.7,1.4,0.0
B,32.7,28.8,36.0,2.5,0.0
A,0.0,2.9,5.9,88.2,2.9


In [26]:
round(100*prop.table(dissonance_matrix, 2), 1);

Unnamed: 0,E,D,C,B,A
E,75.9,71.6,58.0,16.1,11.8
D,17.7,13.7,15.5,13.2,14.7
C,6.3,14.4,25.7,67.9,70.6
B,0.1,0.3,0.7,2.5,0.0
A,0.0,0.0,0.0,0.2,2.9


In [27]:
## Let's write the stubs out to a file
write.table(merge(articles_by_pop[(prediction == 'E' | prediction == 'D' | prediction == 'C' | prediction == 'B') & pop_class == 'A'], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201411_a_class_views_less_than_a_quality.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');
merge(articles_by_pop[(prediction == 'E' | prediction == 'D' | prediction == 'C' | prediction == 'B') & pop_class == 'A'], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")]

entity_id,pop_class,prediction.x
Q1002972,A,E
Q103204,A,C
Q1048694,A,C
Q105584,A,C
Q1063819,A,C
Q10726338,A,D
Q1123836,A,D
Q116933,A,C
Q1373513,A,C
Q14005,A,D


In [28]:
write.table(merge(articles_by_pop[prediction == 'A' & (pop_class == 'B' | pop_class == 'C' | pop_class == 'D' | pop_class == 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201411_a_class_quality_less_than_a_views.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

In [29]:
write.table(merge(articles_by_pop[(prediction == 'A' & pop_class == 'A') | (prediction == 'B' & pop_class == 'B') | (prediction == 'C' & pop_class == 'C') | (prediction == 'D' & pop_class == 'D') | (prediction == 'E' & pop_class == 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201411_aligned.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

In [None]:
write.table(merge(articles_by_pop[(prediction == 'A' & pop_class != 'A') | (prediction == 'B' & pop_class != 'B') | (prediction == 'C' & pop_class != 'C') | (prediction == 'D' & pop_class != 'D') | (prediction == 'E' & pop_class != 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201411_misaligned.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

# Dissonance Measures (was seperate file)

In [29]:
## Various ways of measuring dissonance.

## DATA ASSUMPTION: articles_by_pop from build-dissonance-table.R
## is loaded into memory.

## None/Moderate/High measure of dissonance

In [30]:
articles_by_pop[, pop_class := ordered(pop_class, assessment_classes)];

entity_id,prediction,page_views,pop_class,seqNum
Q10040378,E,0,E,1
Q10069140,E,0,E,2
Q10081695,E,0,E,3
Q10092002,E,0,E,4
Q10111267,E,0,E,5
Q10149726,E,0,E,6
Q10180230,E,0,E,7
Q10185035,E,0,E,8
Q10205202,E,0,E,9
Q10252966,E,0,E,10


In [31]:
dissonance_metric = c('High negative', 'Moderate negative',
  'None', 'Moderate positive', 'High positive');

In [32]:
articles_by_pop[, dissonance := factor(NA, dissonance_metric)];

entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


In [33]:
## NOTE: because pop_class is of class ordered, we can use
##       expressions like "pop_class < 'C'" as expected

In [34]:
## A: None if A, Moderate if A, High elsewhere
articles_by_pop[prediction == 'A' & pop_class <= 'C',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'A' & pop_class == 'B',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'A' & pop_class == 'A',
                dissonance := 'None'];


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


In [35]:
## B: 
articles_by_pop[prediction == 'B' & pop_class <= 'D',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'B' & pop_class == 'C',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'B' & pop_class == 'B',
                dissonance := 'None'];
articles_by_pop[prediction == 'B' & pop_class == 'A',
                dissonance := 'Moderate positive'];

entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


In [36]:
## C: 
articles_by_pop[prediction == 'C' & pop_class == 'E',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'C' & pop_class == 'D',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'C' & pop_class == 'C',
                dissonance := 'None'];
articles_by_pop[prediction == 'C' & pop_class == 'B',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'C' & pop_class == 'A',
                dissonance := 'High positive'];

entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


In [37]:
## D
articles_by_pop[prediction == 'D' & pop_class == 'E',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'D' & pop_class == 'D',
                dissonance := 'None'];
articles_by_pop[prediction == 'D' & pop_class == 'C',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'D' & pop_class >= 'B',
                dissonance := 'High positive'];

entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


In [38]:
## E
articles_by_pop[prediction == 'E' & pop_class == 'E',
                dissonance := 'None'];
articles_by_pop[prediction == 'E' & pop_class == 'D',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'E' & pop_class >= 'C',
                dissonance := 'High positive'];

entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


entity_id,prediction,page_views,pop_class,seqNum,dissonance
Q10040378,E,0,E,1,
Q10069140,E,0,E,2,
Q10081695,E,0,E,3,
Q10092002,E,0,E,4,
Q10111267,E,0,E,5,
Q10149726,E,0,E,6,
Q10180230,E,0,E,7,
Q10185035,E,0,E,8,
Q10205202,E,0,E,9,
Q10252966,E,0,E,10,


In [39]:
## Build a matrix where columns are the metric and rows are classes
create_alt_diss_matrix = function(articledata, metric, classes) {
  d_mtrx = matrix(0, nrow=length(classes), ncol=length(metric));
  rownames(d_mtrx) = classes;
  colnames(d_mtrx) = metric;

  ## NOTE: R matrix values are [row,col] dimensions
  for(real_rating in classes) {
    for(diss_rating in metric) {
      d_mtrx[real_rating, diss_rating] = length(articledata[prediction == real_rating & dissonance == diss_rating]$entity_id);
    }
  }
  d_mtrx;
}

alternative_dissonance_matrix.1 = create_alt_diss_matrix(articles_by_pop,
  dissonance_metric, assessment_classes);




In [40]:
## Normalise by row
round(100*prop.table(alternative_dissonance_matrix.1, 1), 1);




Unnamed: 0,High negative,Moderate negative,None,Moderate positive,High positive
E,0.0,0.0,75.9,16.4,7.7
D,0.0,77.2,13.7,8.9,0.2
C,47.9,25.0,25.7,1.4,0.0
B,61.5,36.0,2.5,0.0,0.0
A,8.8,88.2,2.9,0.0,0.0


In [41]:
## Number of dissonant views per assessment class and amount of dissonance
articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(prediction, dissonance)];



prediction,dissonance,dissonant_views
E,,2496094058
D,Moderate negative,428038358
C,High negative,264055257
B,High negative,42360032
E,Moderate positive,7015375848
C,Moderate negative,1545465162
D,,1377542278
A,High negative,1268220
E,High positive,116957818179
C,,45552386215


In [42]:
## Calculations of total number of dissonant views per dissonance
articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)];

dissonance,dissonant_views
,60644653956
Moderate negative,7879917046
High negative,307683509
Moderate positive,146755790431
High positive,298812675558


In [43]:
articles_by_pop[,sum(as.numeric(page_views))];

In [44]:
## Proportions
100*65938379920/545180810059;
100*125047198/545180810059;
100*6713682043/545180810059;
100*120523625541/545180810059;
100*351880075357/545180810059;

In [45]:
# 87% of views are high positive

In [46]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][1][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])

dissonant_views
11.78938


In [47]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][2][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])

dissonant_views
1.531864


In [48]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][3][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])

dissonant_views
0.05981397


In [49]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][4][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])

dissonant_views
28.52947


In [50]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][5][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])

dissonant_views
58.08947
