In [1]:
%%loadFromPOM
<repository>
    <id>bintray-ruivieira-maven</id>
    <url>https://dl.bintray.com/ruivieira/maven</url>
</repository>
<dependency>
    <groupId>org.ruivieira.ml</groupId>
    <artifactId>als</artifactId>
    <version>0.0.3</version>
</dependency>

In [2]:
import org.ruivieira.ml.als.Rating;

List<Rating> ratings = new ArrayList<>();

In [3]:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;


BufferedReader reader;
try {
    reader = new BufferedReader(new FileReader("/home/jovyan/ml-latest-small/ratings.csv"));
    reader.readLine(); // skip header
    String line = reader.readLine();
    while (line != null) {
        String[] tokens = line.split(",");
        int user = Integer.parseInt(tokens[0]);
        int item = Integer.parseInt(tokens[1]);
        double rating = Double.parseDouble(tokens[2]);
        ratings.add(new Rating(user, item, rating));
        // read next line
        line = reader.readLine();
    }
    reader.close();
} catch (IOException e) {
    e.printStackTrace();
}

In [4]:
Rating r = ratings.get(0);
System.out.println("user: " + r.getUser() + ", item: " + r.getItem() + ", rating: " + r.getRating());

user: 1, item: 1, rating: 4.0


In [5]:
ratings = ratings.subList(0, 1000);

## Batch ALS

In [6]:
import org.apache.commons.math3.linear.SparseRealMatrix;
import org.ruivieira.ml.als.ALSUtils;

SparseRealMatrix R = ALSUtils.toMatrix(ratings);

In [7]:
int maxUser = ALSUtils.maxUser(ratings);
int maxItem = ALSUtils.maxItem(ratings);

System.out.println("max user: " + maxUser + "\nmax item: " + maxItem);

max user: 7
max item: 131724


In [9]:
import org.ruivieira.ml.als.LatentFactors;

int rank = 4;

LatentFactors factors = LatentFactors.create(maxUser, maxItem, rank);

In [11]:
import org.ruivieira.ml.als.BatchALS;

BatchALS als = new BatchALS(R, rank, 0.001, 0.001);

In [12]:
int iterations = 100;

for (int i = 0 ; i < iterations ; i++) {
    factors = als.run(factors);
}

In [14]:
import org.apache.commons.math3.linear.RealMatrix;

RealMatrix nR = ALSUtils.approximate(factors);

In [15]:
public double MSE(SparseRealMatrix ratings, RealMatrix predictions) {

    int rows = ratings.getRowDimension();
    int cols = ratings.getColumnDimension();
    
    int counter = 0;
    double se = 0.0;
    for (int i = 0 ; i < rows ; i++) {
        for (int j = 0 ; j < cols ; j++) {
            double rating = ratings.getEntry(i, j);
            if (rating > 0d) {
                counter++;
                se += Math.pow(rating - predictions.getEntry(i, j), 2.0);
            }
        }
    }
    return se / (double) counter;
}

In [16]:
MSE(R, nR);

0.19901400547842776

## Stochastic Gradient Descent ALS

In [17]:
import org.ruivieira.ml.als.SGDALS;

SGDALS sgd_als = new SGDALS(rank);

In [18]:
factors = LatentFactors.create(maxUser, maxItem, rank);

In [20]:
for (Rating rating : ratings) {
    List<Rating> _r = new ArrayList<>();
    _r.add(rating);
    factors = sgd_als.run(factors, _r);
}

In [21]:
nR = ALSUtils.approximate(factors);

In [22]:
MSE(R, nR);

2.093066330556519