In [81]:
using DataFrames
using Plots
using Distributions

In [82]:
basic_cols = [:id, :title, :metascore, :user_score, :release_date, :running_time, :rating, :company, :positive, :mixed, :negative]
review_cols = [:id, :score, :publication, :critic]

4-element Array{Symbol,1}:
 :id         
 :score      
 :publication
 :critic     

In [83]:
basics = readtable("../data/basics1001.csv", names = basic_cols);
reviews = readtable("../data/reviews1001.csv", names=review_cols);

In [126]:
cutoff = 70
reviews_normalized = reviews[:score] .>= cutoff;

In [85]:
film_ids = convert(Array, unique(reviews[:id]))
critic_ids = convert(Array, unique(reviews[:critic]));

In [86]:
film_dict = Dict(collect(zip(film_ids, 1:length(film_ids))))
critic_dict = Dict(collect(zip(critic_ids, 1:length(critic_ids))));

In [87]:
film_is = [film_dict[film] for film in reviews[:id]]
critic_is = [critic_dict[critic] for critic in reviews[:critic]];

In [127]:
critic_x_film = sparse(critic_is, film_is, reviews_normalized)

2569×7273 sparse matrix with 153691 Bool nonzero entries:
	[1   ,    1]  =  false
	[2   ,    1]  =  false
	[3   ,    1]  =  false
	[4   ,    1]  =  false
	[5   ,    1]  =  false
	[6   ,    1]  =  false
	[7   ,    2]  =  true
	[8   ,    2]  =  true
	[9   ,    2]  =  true
	[10  ,    2]  =  true
	⋮
	[314 , 7272]  =  true
	[699 , 7272]  =  true
	[706 , 7272]  =  false
	[4   , 7273]  =  false
	[19  , 7273]  =  false
	[87  , 7273]  =  true
	[124 , 7273]  =  true
	[260 , 7273]  =  true
	[350 , 7273]  =  false
	[560 , 7273]  =  false
	[703 , 7273]  =  true

In [128]:
means = Array{Float64}(size(critic_x_film, 2)) 
for i in 1:size(critic_x_film, 2)
   means[i] = mean(nonzeros(critic_x_film[:, i]))
end

In [129]:
fitted = fit(Beta, means)

Distributions.Beta{Float64}(α=0.6391505550181169, β=0.895176351030618)

In [130]:
x = 0:0.001:1
histogram(means, normed=true)
plot!(x, pdf(fitted,x))

In [131]:
α, β = params(fitted)

(0.6391505550181169,0.895176351030618)

4.776726832762454

In [46]:
x = 0:.1:1

0.0:0.1:1.0

In [50]:
collect(take(x, 5))

5-element Array{Float64,1}:
 0.0
 0.1
 0.2
 0.3
 0.4

In [55]:
sum(means)

415579

In [99]:
maximum(means)

1.5384615384615385

In [93]:
means

7273-element Array{Int64,1}:
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 ⋮
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0
 0

In [103]:
 a = critic_x_film[:, 2]

Sparse vector of length 2569 with 15 Int64 nonzero entries:
  [7   ]  =  1
  [8   ]  =  1
  [9   ]  =  1
  [10  ]  =  1
  [11  ]  =  1
  [12  ]  =  1
  [13  ]  =  1
  [14  ]  =  1
  [15  ]  =  0
  [16  ]  =  0
  [17  ]  =  0
  [18  ]  =  0
  [19  ]  =  0
  [20  ]  =  0
  [21  ]  =  0


In [106]:
mean(nonzeros(a))


0.5333333333333333

In [107]:
nonzeros(a)

15-element Array{Int64,1}:
 1
 1
 1
 1
 1
 1
 1
 1
 0
 0
 0
 0
 0
 0
 0

In [108]:
maximum(critic_x_film)

3

In [109]:
maximum(reviews_normalized)

1

In [112]:
?sparse

search: [1ms[22m[1mp[22m[1ma[22m[1mr[22m[1ms[22m[1me[22m [1ms[22m[1mp[22m[1ma[22m[1mr[22m[1ms[22m[1me[22mvec [1mS[22m[1mp[22m[1ma[22m[1mr[22m[1ms[22m[1me[22mVector [1mS[22m[1mp[22m[1ma[22m[1mr[22m[1ms[22m[1me[22mArrays [1mS[22m[1mp[22m[1ma[22m[1mr[22m[1ms[22m[1me[22mMatrixCSC i[1ms[22ms[1mp[22m[1ma[22m[1mr[22m[1ms[22m[1me[22m



```
sparse(A)
```

Convert an AbstractMatrix `A` into a sparse matrix.

```
sparse(I, J, V,[ m, n, combine])
```

Create a sparse matrix `S` of dimensions `m x n` such that `S[I[k], J[k]] = V[k]`. The `combine` function is used to combine duplicates. If `m` and `n` are not specified, they are set to `maximum(I)` and `maximum(J)` respectively. If the `combine` function is not supplied, `combine` defaults to `+` unless the elements of `V` are Booleans in which case `combine` defaults to `|`. All elements of `I` must satisfy `1 <= I[k] <= m`, and all elements of `J` must satisfy `1 <= J[k] <= n`. Numerical zeros in (`I`, `J`, `V`) are retained as structural nonzeros; to drop numerical zeros, use [`dropzeros!`](:func:`dropzeros!`).

For additional documentation and an expert driver, see `Base.SparseArrays.sparse!`.
