In [44]:
using DataFrames
using LowRankModels

[1m[34mINFO: Precompiling module LowRankModels.


In [16]:
basic_cols = [:id, :title, :metascore, :user_score, :release_date, :running_time, :rating, :company, :positive, :mixed, :negative]
review_cols = [:id, :score, :publication, :critic]

4-element Array{Symbol,1}:
 :id         
 :score      
 :publication
 :critic     

In [17]:
basics = readtable("../data/basics1001.csv", names = basic_cols);
reviews = readtable("../data/reviews1001.csv", names=review_cols);

In [38]:
film_ids = convert(Array, unique(reviews[:id]))
critic_ids = convert(Array, unique(reviews[:critic]));

In [39]:
film_dict = Dict(collect(zip(film_ids, 1:length(film_ids))))
critic_dict = Dict(collect(zip(critic_ids, 1:length(critic_ids))));

In [41]:
film_is = [film_dict[film] for film in reviews[:id]]
critic_is = [critic_dict[critic] for critic in reviews[:critic]];

In [57]:
critic_x_film = sparse(critic_is, film_is, reviews[:score])

1718×2356 sparse matrix with 48226 Int64 nonzero entries:
	[1   ,    1]  =  65
	[2   ,    1]  =  60
	[3   ,    1]  =  50
	[4   ,    1]  =  40
	[5   ,    1]  =  30
	[6   ,    1]  =  12
	[7   ,    2]  =  91
	[8   ,    2]  =  91
	[9   ,    2]  =  88
	[10  ,    2]  =  75
	⋮
	[154 , 2356]  =  60
	[180 , 2356]  =  90
	[235 , 2356]  =  83
	[308 , 2356]  =  80
	[324 , 2356]  =  91
	[410 , 2356]  =  63
	[437 , 2356]  =  83
	[481 , 2356]  =  80
	[529 , 2356]  =  88
	[598 , 2356]  =  75
	[638 , 2356]  =  80

In [76]:
model = LowRankModels.rpca(critic_x_film, 1000);

In [77]:
X, Y, _ = fit!(model)

LowRankModels.SparseProxGradParams(1.0,100,1,1.0e-5,0.01)
Fitting GLRM
obj went up to 1.9743758787236742e6; reducing step size to 1.0342188106523442
Iteration 10: objective value = 1.8007223754132967e6
obj went up to 1.4456050080707148e6; reducing step size to 0.7981583671209468
obj went up to 1.5232283176801146e6; reducing step size to 0.5587108569846627
obj went up to 1.1911250430513597e6; reducing step size to 0.3910975998892639
obj went up to 919102.3727560815; reducing step size to 0.27376831992248474
Iteration 20: objective value = 883495.5508137017
obj went up to 821865.4055870515; reducing step size to 0.19163782394573933
obj went up to 727447.6661405759; reducing step size to 0.13414647676201755
obj went up to 664090.3697026959; reducing step size to 0.09390253373341229
obj went up to 625035.6233364472; reducing step size to 0.0657317736133886
obj went up to 594499.9322256886; reducing step size to 0.04601224152937202
Iteration 30: objective value = 583738.5846783619
obj went 

(
[-0.662434 0.466237 … -0.151947 -0.24568; 0.186882 -0.464404 … 0.279972 -0.109405; … ; 0.300383 0.0174432 … -0.140701 0.152084; -0.893847 0.140644 … -0.0742042 -0.19414],

[-0.0834411 -0.152263 … -0.709245 -1.11738; -0.137557 -0.27284 … -0.297751 0.0968416; … ; 0.0582126 0.498501 … 0.208046 0.772562; -0.0557331 -0.171654 … 0.112496 -0.358798],

LowRankModels.ConvergenceHistory("SparseProxGradGLRM",[7.1187e6,4.66405e6,3.63579e6,2.83837e6,2.72988e6,2.27718e6,2.13808e6,1.86718e6,1.83835e6,1.80072e6  …  5.05754e5,5.05117e5,5.05019e5,5.04294e5,5.03506e5,5.028e5,5.02549e5,5.01997e5,5.01804e5,5.01804e5],Float64[],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[0.0,0.404557,0.696101,1.09024,1.37858,1.77861,2.06752,2.46057,2.74733,3.1392  …  31.6609,31.9576,32.4168,32.7068,33.4002,33.7975,34.0922,34.4984,34.7847,34.7853],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,

In [78]:
size(X)

(1000,1718)

In [79]:
size(Y)

(1000,2356)

In [80]:
size(critic_x_film)

(1718,2356)

In [81]:
X' * Y

1718×2356 Array{Float64,2}:
 64.5354      18.378        7.49311     …  19.7832     34.3098    
 59.6642      25.3508       6.42827        23.5817     21.3894    
 49.7046      20.8045       4.04184        12.7527     25.825     
 39.8855      35.9826       5.60792        22.2665     30.6237    
 29.8752      23.3823      10.7987         13.3649     32.3282    
 12.0869      23.2916       5.79991     …  20.6986     40.786     
  9.23584     90.8491      20.4101         74.943      36.3512    
 23.327       90.8482      14.7056         79.882      74.9924    
 20.2195      87.8604      17.9814         74.9385     44.9091    
 12.0842      74.8687       7.95804        17.7582     19.7877    
 15.7871      74.8408       8.43691     …  14.8218     74.9167    
 13.9828      74.6076      10.0156         24.8069     26.4749    
 13.5779      74.8118       2.73005        32.1923     38.867     
  ⋮                                     ⋱               ⋮         
  4.66266     11.9268       3.2914

In [111]:
means = Array{Int64}(size(critic_x_film, 2)) 
for i in 1:size(critic_x_film, 2)
   means[i] = floor(Int64, mean(nonzeros(critic_x_film[:, i])))
end

In [115]:
full_matrix = Array{Int64}(size(critic_x_film)...)

2356-element Array{Int64,1}:
 42
 71
 43
 82
 57
 80
 58
 61
 68
 60
 41
 75
 58
  ⋮
 50
 72
 73
 47
 52
 44
 81
 81
 57
 69
 65
 81

In [120]:
for i in 1:size(critic_x_film, 1), j in 1:size(critic_x_film, 2)
    if critic_x_film[i, j] == 0
        full_matrix[i, j] = means[j]
    else
        full_matrix[i, j] = critic_x_film[i, j]
    end
end

In [127]:
vars = var(full_matrix, 1)

1×2356 Array{Float64,2}:
 1.14443  1.75655  1.25044  1.39515  …  1.48398  2.54446  4.63996  3.72219

In [133]:
perm = sortperm(vec(vars), rev=true)

2356-element Array{Int64,1}:
 1188
 1106
 1025
 1829
 1065
 1220
   49
 1553
 1782
  238
 1245
 1725
  909
    ⋮
  451
  230
 2117
  482
  440
 1197
   73
  467
  568
 1959
  259
  720

In [138]:
film_ids[perm[1:20]]

20-element Array{String,1}:
 "cabaret"                      
 "broadcast-news"               
 "born-on-the-fourth-of-july"   
 "dogville"                     
 "brazil"                       
 "cape-fear"                    
 "2001-a-space-odyssey"         
 "crimes-and-misdemeanors"      
 "diner"                        
 "a-river-runs-through-it"      
 "cars"                         
 "delicatessen"                 
 "bill-teds-excellent-adventure"
 "21-grams"                     
 "crush"                        
 "elephant"                     
 "cloud-atlas"                  
 "bad-lieutenant"               
 "eyes-wide-shut"               
 "days-of-heaven-re-release"    

In [161]:

subset = reviews[reviews[:id] .== "a-river-runs-through-it", :]

Unnamed: 0,id,score,publication,critic
1,a-river-runs-through-it,100,San Francisco Chronicle,Peter Stack
2,a-river-runs-through-it,100,Seattle Post-Intelligencer,William Arnold
3,a-river-runs-through-it,90,Variety,Staff (Not Credited)
4,a-river-runs-through-it,90,The New York Times,Caryn James
5,a-river-runs-through-it,88,Chicago Sun-Times,Roger Ebert
6,a-river-runs-through-it,88,ReelViews,James Berardinelli
7,a-river-runs-through-it,78,Austin Chronicle,Hollis Chacona
8,a-river-runs-through-it,75,Boston Globe,Jay Carr
9,a-river-runs-through-it,75,Portland Oregonian,Ted Mahar
10,a-river-runs-through-it,75,The Globe and Mail (Toronto),Rick Groen


In [190]:
function build_most_variant(dataframe)
    movies = unique(dataframe[:id])
    variances = zeros(length(movies))
    for (i, movie) in enumerate(movies)
        subset = dataframe[dataframe[:id] .== movie, :]
        variances[i] = var(subset[:score])
    end
        
    perm = sortperm(variances, rev=true)
    movies[perm][1:20]
end



build_most_variant (generic function with 1 method)

In [240]:
function exclude_critics(dataframe, movie, rating)
    LO, MED, HI = 60, 75, 88
   subset = dataframe[dataframe[:id].== movie, :]
    if rating == 0
        excluded = subset[subset[:score].>= LO, :][:critic]
    elseif rating == 1
        excluded = subset[(subset[:score].<= LO) | (subset[:score].>= HI), :][:critic]
    else
        excluded = subset[subset[:score].<= HI, :][:critic]
    end
    
    excluded
end



exclude_critics (generic function with 1 method)

In [215]:
function subset_df(df, excluded_critics)
    return df[[~(x in excluded_critics) for x in df[:critic]], :]
end



subset_df (generic function with 1 method)

In [191]:
build_most_variant(reviews)

20-element DataArrays.DataArray{String,1}:
 "film-geek"                            
 "butch-cassidy-and-the-sundance-kid"   
 "dogville"                             
 "a-moment-of-innocence"                
 "coming-to-america"                    
 "crash-1997"                           
 "fracknation"                          
 "beer-league"                          
 "2001-a-space-odyssey"                 
 "broken-sky"                           
 "ace-ventura-pet-detective"            
 "ferris-buellers-day-off"              
 "bound"                                
 "fear-and-loathing-in-las-vegas"       
 "american-cannibal-the-road-to-reality"
 "air-force-one"                        
 "8mm"                                  
 "a-clockwork-orange"                   
 "bandits-1999"                         
 "fleeing-by-night"                     

In [221]:
excluded = exclude_critics(reviews, "a-clockwork-orange", 2)

4-element DataArrays.DataArray{String,1}:
 "Staff (Not Credited)"
 "Michael Atkinson"    
 "Roger Ebert"         
 "Dave Kehr"           

In [222]:
review_2 = subset_df(reviews, excluded);

In [223]:
build_most_variant(review_2)

20-element DataArrays.DataArray{String,1}:
 "film-geek"                            
 "a-moment-of-innocence"                
 "dogville"                             
 "coming-to-america"                    
 "ferris-buellers-day-off"              
 "2001-a-space-odyssey"                 
 "crash-1997"                           
 "crimes-and-misdemeanors"              
 "fracknation"                          
 "beer-league"                          
 "dog-run"                              
 "ace-ventura-pet-detective"            
 "broken-sky"                           
 "air-force-one"                        
 "fear-and-loathing-in-las-vegas"       
 "american-cannibal-the-road-to-reality"
 "bound"                                
 "conan-the-barbarian"                  
 "fleeing-by-night"                     
 "broken-arrow"                         

In [242]:
excluded = exclude_critics(review_2, "bound", 1)

10-element DataArrays.DataArray{String,1}:
 "Mick LaSalle"       
 "Jimmy Fowler"       
 "James Berardinelli" 
 "Staff(not credited)"
 "Mike Clark"         
 "Bruce Walker"       
 "Elvis Mitchell"     
 "David Sterritt"     
 "Staff(not credited)"
 "Todd McCarthy"      

In [244]:
review_3 = subset_df(review_2, excluded);

In [245]:
build_most_variant(review_3)

20-element DataArrays.DataArray{String,1}:
 "3-women"                       
 "black-rain"                    
 "film-geek"                     
 "crash-1997"                    
 "dogville"                      
 "2001-a-space-odyssey"          
 "coming-to-america"             
 "air-force-one"                 
 "ace-ventura-pet-detective"     
 "ferris-buellers-day-off"       
 "crimes-and-misdemeanors"       
 "fear-and-loathing-in-las-vegas"
 "fracknation"                   
 "beer-league"                   
 "dog-run"                       
 "broken-arrow"                  
 "broken-sky"                    
 "fleeing-by-night"              
 "deconstructing-harry"          
 "dumb-and-dumber"               

In [246]:
excluded = exclude_critics(review_3, "deconstructing-harry", 2)

12-element DataArrays.DataArray{String,1}:
 "Ron Wells"          
 "David Ansen"        
 "David Stratton"     
 "Rita Kempley"       
 "Marjorie Baumgarten"
 "David Edelstein"    
 "Andrew O'Hehir"     
 "Angie Errigo"       
 "Maitland McDonagh"  
 "Manohla Dargis"     
 "Nathan Rabin"       
 "Jonathan Rosenbaum" 

In [247]:
review_4 = subset_df(review_3, excluded);

In [248]:
build_most_variant(review_4)

20-element DataArrays.DataArray{String,1}:
 "3-women"                       
 "black-rain"                    
 "2001-a-space-odyssey"          
 "film-geek"                     
 "crash-1997"                    
 "ferris-buellers-day-off"       
 "coming-to-america"             
 "air-force-one"                 
 "dogville"                      
 "fear-and-loathing-in-las-vegas"
 "beer-league"                   
 "ace-ventura-pet-detective"     
 "dumb-and-dumber"               
 "fracknation"                   
 "dog-run"                       
 "cabaret"                       
 "come-out-and-play"             
 "broken-sky"                    
 "bagdad-cafe"                   
 "fleeing-by-night"              

In [249]:
excluded = exclude_critics(review_4, "bagdad-cafe", 2)

2-element DataArrays.DataArray{String,1}:
 "Staff [Not Credited]"
 "Janet Maslin"        

In [250]:
review_5 = subset_df(review_4, excluded)
build_most_variant(review_5)

20-element DataArrays.DataArray{String,1}:
 "3-women"                                   
 "bagdad-cafe"                               
 "black-rain"                                
 "ferris-buellers-day-off"                   
 "2001-a-space-odyssey"                      
 "film-geek"                                 
 "crash-1997"                                
 "coming-to-america"                         
 "air-force-one"                             
 "dogville"                                  
 "fear-and-loathing-in-las-vegas"            
 "brazil"                                    
 "beer-league"                               
 "ace-ventura-pet-detective"                 
 "dumb-and-dumber"                           
 "fracknation"                               
 "dog-run"                                   
 "cabaret"                                   
 "austin-powers-international-man-of-mystery"
 "come-out-and-play"                         

In [251]:
excluded = exclude_critics(review_5, "brazil", 2)

1-element DataArrays.DataArray{String,1}:
 "Stanley Kauffmann"

In [252]:
review_6 = subset_df(review_5, excluded)
build_most_variant(review_6)

20-element DataArrays.DataArray{String,1}:
 "3-women"                                   
 "bagdad-cafe"                               
 "black-rain"                                
 "ferris-buellers-day-off"                   
 "2001-a-space-odyssey"                      
 "crash-1997"                                
 "film-geek"                                 
 "air-force-one"                             
 "coming-to-america"                         
 "fear-and-loathing-in-las-vegas"            
 "dogville"                                  
 "beer-league"                               
 "ace-ventura-pet-detective"                 
 "dumb-and-dumber"                           
 "fracknation"                               
 "dog-run"                                   
 "cabaret"                                   
 "austin-powers-international-man-of-mystery"
 "come-out-and-play"                         
 "broken-sky"                                

In [253]:
excluded = exclude_critics(review_6, "austin-powers-international-man-of-mystery", 0)

8-element DataArrays.DataArray{String,1}:
 "Graham Verdon"   
 "G. Allen Johnson"
 "Leonard Klady"   
 "Robert Faires"   
 "John Petrakis"   
 "Owen Gleiberman" 
 "Susan Wloszczyna"
 "John F. Kelly"   

In [254]:

review_7 = subset_df(review_6, excluded)
build_most_variant(review_7)

20-element DataArrays.DataArray{String,1}:
 "3-women"                       
 "bagdad-cafe"                   
 "black-rain"                    
 "2001-a-space-odyssey"          
 "ferris-buellers-day-off"       
 "crash-1997"                    
 "film-geek"                     
 "coming-to-america"             
 "air-force-one"                 
 "fear-and-loathing-in-las-vegas"
 "dumb-and-dumber"               
 "china-the-panda-adventure"     
 "die-hard-2"                    
 "dogville"                      
 "crooklyn"                      
 "beer-league"                   
 "broken-sky"                    
 "dead-alive"                    
 "addicted-to-love"              
 "fracknation"                   

In [255]:
excluded = exclude_critics(review_7, "die-hard-2", 1)
review = subset_df(review_7, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "3-women"                       
 "bagdad-cafe"                   
 "black-rain"                    
 "2001-a-space-odyssey"          
 "educating-rita"                
 "ferris-buellers-day-off"       
 "crash-1997"                    
 "film-geek"                     
 "air-force-one"                 
 "fear-and-loathing-in-las-vegas"
 "dumb-and-dumber"               
 "china-the-panda-adventure"     
 "dogville"                      
 "crooklyn"                      
 "beer-league"                   
 "broken-sky"                    
 "flash-gordon"                  
 "dead-alive"                    
 "addicted-to-love"              
 "fracknation"                   

In [256]:
excluded = exclude_critics(review, "2001-a-space-odyssey", 2)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "3-women"                       
 "bagdad-cafe"                   
 "black-rain"                    
 "educating-rita"                
 "ferris-buellers-day-off"       
 "crash-1997"                    
 "film-geek"                     
 "air-force-one"                 
 "dumb-and-dumber"               
 "china-the-panda-adventure"     
 "dogville"                      
 "crooklyn"                      
 "beer-league"                   
 "broken-sky"                    
 "flash-gordon"                  
 "dead-alive"                    
 "fear-and-loathing-in-las-vegas"
 "fracknation"                   
 "dog-run"                       
 "coming-to-america"             

In [260]:
excluded = exclude_critics(review, "dumb-and-dumber", 0)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "3-women"                       
 "bagdad-cafe"                   
 "black-rain"                    
 "educating-rita"                
 "ferris-buellers-day-off"       
 "crash-1997"                    
 "film-geek"                     
 "beer-league"                   
 "air-force-one"                 
 "crooklyn"                      
 "china-the-panda-adventure"     
 "dogville"                      
 "come-out-and-play"             
 "broken-sky"                    
 "flash-gordon"                  
 "dead-alive"                    
 "fear-and-loathing-in-las-vegas"
 "fracknation"                   
 "dog-run"                       
 "coming-to-america"             

In [261]:
excluded = exclude_critics(review, "crash-1997", 1)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "3-women"                  
 "backdraft"                
 "bagdad-cafe"              
 "black-rain"               
 "burn!"                    
 "crimewave"                
 "deconstructing-harry"     
 "diner"                    
 "dumb-and-dumber"          
 "educating-rita"           
 "ferris-buellers-day-off"  
 "crooklyn"                 
 "cabaret"                  
 "film-geek"                
 "beer-league"              
 "air-force-one"            
 "china-the-panda-adventure"
 "dogville"                 
 "come-out-and-play"        
 "broken-sky"               

In [262]:
excluded = exclude_critics(review, "backdraft", 1)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "3-women"                  
 "bagdad-cafe"              
 "black-rain"               
 "burn!"                    
 "crimewave"                
 "deconstructing-harry"     
 "diner"                    
 "dumb-and-dumber"          
 "educating-rita"           
 "ferris-buellers-day-off"  
 "crooklyn"                 
 "cabaret"                  
 "film-geek"                
 "beer-league"              
 "air-force-one"            
 "china-the-panda-adventure"
 "dogville"                 
 "come-out-and-play"        
 "broken-sky"               
 "broken-arrow"             

In [263]:
excluded = exclude_critics(review, "dogville", 2)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "2001-a-space-odyssey"                                              
 "3-women"                                                           
 "30-years-to-life"                                                  
 "all-dogs-go-to-heaven"                                             
 "bagdad-cafe"                                                       
 "black-rain"                                                        
 "brazil"                                                            
 "burn!"                                                             
 "crimewave"                                                         
 "deconstructing-harry"                                              
 "desperado"                                                         
 "diner"                                                             
 "dr-strangelove-or-how-i-learned-to-stop-worrying-and-love-the-bomb"
 "fanny-and-alexander-re-release"              

In [264]:
excluded = exclude_critics(review, "dr-strangelove-or-how-i-learned-to-stop-worrying-and-love-the-bomb", 2)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "2001-a-space-odyssey"          
 "3-women"                       
 "30-years-to-life"              
 "all-dogs-go-to-heaven"         
 "bagdad-cafe"                   
 "black-rain"                    
 "brazil"                        
 "burn!"                         
 "crimewave"                     
 "deconstructing-harry"          
 "desperado"                     
 "diner"                         
 "fanny-and-alexander-re-release"
 "educating-rita"                
 "ferris-buellers-day-off"       
 "cabaret"                       
 "film-geek"                     
 "beer-league"                   
 "dog-run"                       
 "8mm"                           

In [265]:
excluded = exclude_critics(review, "8mm", 0)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "2001-a-space-odyssey"          
 "3-women"                       
 "30-years-to-life"              
 "all-dogs-go-to-heaven"         
 "bagdad-cafe"                   
 "black-rain"                    
 "brazil"                        
 "burn!"                         
 "crimewave"                     
 "deconstructing-harry"          
 "desperado"                     
 "diner"                         
 "fanny-and-alexander-re-release"
 "educating-rita"                
 "ferris-buellers-day-off"       
 "cabaret"                       
 "film-geek"                     
 "beer-league"                   
 "dog-run"                       
 "badlands"                      

In [266]:
excluded = exclude_critics(review, "fanny-and-alexander-re-release", 2)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "2001-a-space-odyssey"          
 "3-women"                       
 "30-years-to-life"              
 "all-dogs-go-to-heaven"         
 "bagdad-cafe"                   
 "black-rain"                    
 "brazil"                        
 "burn!"                         
 "crimewave"                     
 "deconstructing-harry"          
 "desperado"                     
 "diner"                         
 "fanny-and-alexander-re-release"
 "educating-rita"                
 "ferris-buellers-day-off"       
 "cabaret"                       
 "film-geek"                     
 "beer-league"                   
 "dog-run"                       
 "badlands"                      

In [267]:

excluded = exclude_critics(review, "badlands", 1)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "2001-a-space-odyssey"          
 "3-women"                       
 "30-years-to-life"              
 "a-clockwork-orange"            
 "all-the-right-moves"           
 "bagdad-cafe"                   
 "black-rain"                    
 "brazil"                        
 "burn!"                         
 "conan-the-barbarian"           
 "crimes-and-misdemeanors"       
 "deconstructing-harry"          
 "desperado"                     
 "die-hard-2"                    
 "diner"                         
 "easy-rider"                    
 "fanny-and-alexander-re-release"
 "flashdance"                    
 "educating-rita"                
 "ferris-buellers-day-off"       

In [268]:

excluded = exclude_critics(review, "flashdance", 0)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "2001-a-space-odyssey"          
 "3-women"                       
 "30-years-to-life"              
 "a-clockwork-orange"            
 "all-the-right-moves"           
 "bagdad-cafe"                   
 "black-rain"                    
 "brazil"                        
 "burn!"                         
 "conan-the-barbarian"           
 "crimes-and-misdemeanors"       
 "deconstructing-harry"          
 "desperado"                     
 "die-hard-2"                    
 "diner"                         
 "easy-rider"                    
 "fanny-and-alexander-re-release"
 "flashdance"                    
 "educating-rita"                
 "ferris-buellers-day-off"       

In [269]:

excluded = exclude_critics(review, "desperado", 1)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "2001-a-space-odyssey"          
 "3-women"                       
 "30-years-to-life"              
 "a-clockwork-orange"            
 "all-the-right-moves"           
 "bagdad-cafe"                   
 "black-rain"                    
 "brazil"                        
 "burn!"                         
 "conan-the-barbarian"           
 "crimes-and-misdemeanors"       
 "deconstructing-harry"          
 "die-hard-2"                    
 "diner"                         
 "easy-rider"                    
 "fanny-and-alexander-re-release"
 "flashdance"                    
 "educating-rita"                
 "ferris-buellers-day-off"       
 "cabaret"                       

In [270]:

excluded = exclude_critics(review, "conan-the-barbarian", 0)
review = subset_df(review, excluded)
build_most_variant(review)

20-element DataArrays.DataArray{String,1}:
 "2001-a-space-odyssey"          
 "3-women"                       
 "30-years-to-life"              
 "a-clockwork-orange"            
 "all-the-right-moves"           
 "bagdad-cafe"                   
 "black-rain"                    
 "brazil"                        
 "burn!"                         
 "conan-the-barbarian"           
 "crimes-and-misdemeanors"       
 "deconstructing-harry"          
 "die-hard-2"                    
 "diner"                         
 "easy-rider"                    
 "fanny-and-alexander-re-release"
 "flashdance"                    
 "educating-rita"                
 "ferris-buellers-day-off"       
 "cabaret"                       

In [271]:
unique(review[:critic])

1637-element DataArrays.DataArray{String,1}:
 "Inkoo Kang"          
 "Mark Olsen"          
 "Chuck Bowen"         
 "Jeannette Catsoulis" 
 "Peter Sobczynski"    
 "Lisa Schwarzbaum"    
 "Steven Rea"          
 "Tasha Robinson"      
 "Kyle Smith"          
 "Janice Page"         
 "Peter Hartlaub"      
 "Calvin Wilson"       
 "Ella Taylor"         
 ⋮                     
 "Gregory Valens"      
 "Joel Schumacher"     
 "Eliza Truitt"        
 "Gabriella Gershenson"
 "Robert Sietsema"     
 "Sam Worley"          
 "Jess Righthand"      
 "Eleonore Snow"       
 "Don Irvine"          
 "Gina Fattore"        
 "Joseph McBride"      
 "Ross Bennett"        