In [25]:
import numpy as np

In [26]:
eps = 1e-5

def prediction(mu, students, problems):
    return 1 / (1 + np.array([np.exp(-mu - problems - students[i]) for i in range(students.shape[0])]))

def logloss(table, mu, students, problems, regmu, regst, regprob):
    pred = prediction(mu, students, problems)
    pred = np.minimum(np.maximum(eps, pred), 1 - eps)
    
    logloss = np.sum(table * np.log(pred) + (1 - table) * np.log(1 - pred))
    logloss -= regmu * mu * mu
    logloss -= regst * students @ students
    logloss -= regprob * problems @ problems
    
    dmu = np.sum(table - pred) - 2 * regmu * mu
    dstudents = np.sum(table - pred, 1) - 2 * regst * students
    dproblems = np.sum(table - pred, 0) - 2 * regprob * problems
    
    assert(dstudents.shape == students.shape)
    assert(dproblems.shape == problems.shape)
    
    return logloss, dmu, dstudents, dproblems

def LogisticRegression(table, iters=50000, learning_rate=0.0001, regmu=1, regst=1, regprob=10):
    n, m = table.shape
    mu = np.random.sample()
    students = np.random.sample(n)
    problems = np.random.sample(m)
    
    for i in range(iters):
        loss, dmu, dstudents, dproblems = logloss(table, mu, students, problems, regmu, regst, regprob)
        
        mu += learning_rate * dmu
        students += learning_rate * dstudents
        problems += learning_rate * problems
        
#         if (i + 1) % max(iters // 20, 1) == 0:
#             print(f'after iteration {i + 1} logloss is {loss}')
        
    return mu, students, problems

In [27]:
import os

def ParseTable():
    studentsCount = None
    data = []
    problemsCount = 0
    for dirname, _, filenames in os.walk('tasks'):
        for filename in filenames:
            data.append(open(os.path.join(dirname, filename)).read().split('\n'))
            if studentsCount is None:
                studentsCount = len(data[-1])
            for i, row in enumerate(data[-1]):
                data[-1][i] = list(map(int, row.split()))
            problemsCount += sum(data[-1][0])
            assert(studentsCount == len(data[-1]))  
            
    table = np.zeros((studentsCount, problemsCount))
    curProblem = 0
    for seria in data:
        for i in range(len(seria[0])):
            if seria[0][i] == 1:
                for j in range(studentsCount):
                    table[j][curProblem] = seria[j][i]
                curProblem += 1
    return table

In [45]:
threshold = 10

def CalculateRatings():
    table = ParseTable()
    mu, students, problems = LogisticRegression(table, 10000)
    students = (students + threshold - students[0]) * 100
#     print(students)
    
    names = ['Владик'] + open('members.txt').read().split('\n')
    
    res = []
    for i, name in enumerate(names):
        res.append([students[i], f'{name}\t{int(students[i])}'])
    fout = open('rating1.txt', 'w')
    for r, row in reversed(sorted(res)):
        print(row, file=fout)

In [46]:
CalculateRatings()