In [36]:
import surprise as sp
import pandas as pd
import numpy as np
import math
from collections import defaultdict
from scipy import stats

#1.Loading the dataset and Excluding the outliers
kbdata = pd.read_csv("/data/fjsdata/ctKngBase/kb.csv", sep='|', low_memory=False)
#seven month multiply thirty days per month is equal to 210,and one time per day multiply 210 is 210.
kbdata = kbdata.loc[(kbdata['num']<200)]
print ('Dataset shape is:%d rows and %d columns'%(kbdata.shape[0],kbdata.shape[1]))

#2.Calculating the mean of CSRs and KEs.
Ab_csr = kbdata['num'].groupby(kbdata['csr']).mean()#the type of groupby is Series
Di_ke = kbdata['num'].groupby(kbdata['ke']).mean()
#3.Calculating the IRT of every pair<csr,ke> which is the pdf of norm
#mu=Di_ke,sigma=Ab_csr
#x[2]=num,x[1]=ke,x[0]=csr
kbdata['irt'] = kbdata.apply(lambda x: "{:.8f}".format(stats.norm.pdf(int(x[2]), Di_ke[int(x[1])], Ab_csr[int(x[0])])),axis=1)
print (kbdata.head())

Dataset shape is:2547452 rows and 3 columns
    csr     ke  num         irt
0  2986  42211    1  0.09717266
1  2986  28115    2  0.00000000
2  2986  29249    8  0.00142802
3  2986  75667    1  0.19368211
4  2986  74903    1  0.17465533


In [39]:
#SVD,threshold=0.1
#2.Transforming into data format of surprise and spliting the train-set and test-set
# The columns must correspond to user id, item id and ratings (in that order).
reader = sp.Reader(rating_scale=(0, 1))
spdata = sp.Dataset.load_from_df(kbdata[['csr', 'ke', 'irt']],reader)
# sampling random trainset and testset, and test set is made of 10% of the ratings.
#trainset, testset = sp.model_selection.train_test_split(spdata, test_size=.1)
trainset = spdata.build_full_trainset()
testset = trainset.build_testset()

#3.Training the model and predicting ratings for the testset
algo = sp.SVD()
algo.fit(trainset)
predictions = algo.test(testset)#testset include positive and negtive sample.

#4.measuring the performance of SVD by precision, recall and  NDCG
#print ('RMSE of testset is:%.8f'%(sp.accuracy.rmse(predictions)))
def calc_dcg(items):
    dcg = 0
    i = 0
    for item in items:
        i += 1
        dcg += (math.pow(2, item) - 1)/ math.log(1 + i, 2)
    return dcg
def index_at_k(predictions, k, threshold=0.1):
   #Return precision and recall at k metrics for each user.
    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    ndcgs =dict()
    for uid, user_ratings in user_est_true.items():
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # Number of relevant items
        n_rel = sum((true_r > threshold) for (_, true_r) in user_ratings)
        # Number of recommended items in top k
        n_rec_k = sum((est > threshold) for (est, _) in user_ratings[:k])
        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r > threshold) and (est > threshold)) for (est, true_r) in user_ratings[:k])
        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1
        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1
        #true ratings of recommended items in top k
        l_rec_k = [true_r for (_,true_r) in user_ratings[:k]]
        dcg = calc_dcg(l_rec_k)
        #l_rec_k.sort(reverse=True)
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        l_rel_k = [true_r for (_,true_r) in user_ratings[:k]]
        idcg = calc_dcg(l_rel_k)
        if (idcg==0):idcg = 1
        ndcgs[uid]=dcg*1.0/idcg
    return precisions, recalls, ndcgs

print ("%3s%20s%20s%20s" % ('K','Precisions','Recalls','NDCG'))
for k in [5,10,15,20]:#latent factor
    precisions, recalls, ndcgs = index_at_k(predictions, k=k)
    # Precision and recall can then be averaged over all users
    precision = sum(prec for prec in precisions.values()) / len(precisions)
    recall = sum(rec for rec in recalls.values()) / len(recalls)
    ndcg = sum(ndcg for ndcg in ndcgs.values()) / len(ndcgs)
    print ("%3s%20.8f%20.8f%20.8f" % (k, precision, recall, ndcg))

  K          Precisions             Recalls                NDCG
  5          0.64534554          0.54452437          0.93450164
 10          0.63577434          0.59990276          0.93442357
 15          0.62920554          0.63537925          0.93470233
 20          0.62379936          0.66120616          0.93491894


In [40]:
#SVD,threshold=0.0
#2.Transforming into data format of surprise and spliting the train-set and test-set
# The columns must correspond to user id, item id and ratings (in that order).
reader = sp.Reader(rating_scale=(0, 1))
spdata = sp.Dataset.load_from_df(kbdata[['csr', 'ke', 'irt']],reader)
# sampling random trainset and testset, and test set is made of 10% of the ratings.
#trainset, testset = sp.model_selection.train_test_split(spdata, test_size=.1)
trainset = spdata.build_full_trainset()
testset = trainset.build_testset()

#3.Training the model and predicting ratings for the testset
algo = sp.SVD()
algo.fit(trainset)
predictions = algo.test(testset)#testset include positive and negtive sample.

#4.measuring the performance of SVD by precision, recall and  NDCG
#print ('RMSE of testset is:%.8f'%(sp.accuracy.rmse(predictions)))
def calc_dcg(items):
    dcg = 0
    i = 0
    for item in items:
        i += 1
        dcg += (math.pow(2, item) - 1)/ math.log(1 + i, 2)
    return dcg
def index_at_k(predictions, k, threshold=0.0):
   #Return precision and recall at k metrics for each user.
    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    ndcgs =dict()
    for uid, user_ratings in user_est_true.items():
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # Number of relevant items
        n_rel = sum((true_r > threshold) for (_, true_r) in user_ratings)
        # Number of recommended items in top k
        n_rec_k = sum((est > threshold) for (est, _) in user_ratings[:k])
        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r > threshold) and (est > threshold)) for (est, true_r) in user_ratings[:k])
        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1
        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1
        #true ratings of recommended items in top k
        l_rec_k = [true_r for (_,true_r) in user_ratings[:k]]
        dcg = calc_dcg(l_rec_k)
        #l_rec_k.sort(reverse=True)
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        l_rel_k = [true_r for (_,true_r) in user_ratings[:k]]
        idcg = calc_dcg(l_rel_k)
        if (idcg==0):idcg = 1
        ndcgs[uid]=dcg*1.0/idcg
    return precisions, recalls, ndcgs

print ("%3s%20s%20s%20s" % ('K','Precisions','Recalls','NDCG'))
for k in [5,10,15,20]:#latent factor
    precisions, recalls, ndcgs = index_at_k(predictions, k=k)
    # Precision and recall can then be averaged over all users
    precision = sum(prec for prec in precisions.values()) / len(precisions)
    recall = sum(rec for rec in recalls.values()) / len(recalls)
    ndcg = sum(ndcg for ndcg in ndcgs.values()) / len(ndcgs)
    print ("%3s%20.8f%20.8f%20.8f" % (k, precision, recall, ndcg))

  K          Precisions             Recalls                NDCG
  5          0.97780279          0.20617431          0.93414137
 10          0.96902750          0.27575618          0.93380992
 15          0.96274146          0.32310532          0.93377836
 20          0.95811974          0.36025142          0.93437106


In [41]:
#NMF,threshold=0.1
#2.Transforming into data format of surprise and spliting the train-set and test-set
# The columns must correspond to user id, item id and ratings (in that order).
reader = sp.Reader(rating_scale=(0, 1))
spdata = sp.Dataset.load_from_df(kbdata[['csr', 'ke', 'irt']],reader)
# sampling random trainset and testset, and test set is made of 10% of the ratings.
#trainset, testset = sp.model_selection.train_test_split(spdata, test_size=.1)
trainset = spdata.build_full_trainset()
testset = trainset.build_testset()

#3.Training the model and predicting ratings for the testset
algo = sp.NMF()
algo.fit(trainset)
predictions = algo.test(testset)#testset include positive and negtive sample.

#4.measuring the performance of SVD by precision, recall and  NDCG
#print ('RMSE of testset is:%.8f'%(sp.accuracy.rmse(predictions)))
def calc_dcg(items):
    dcg = 0
    i = 0
    for item in items:
        i += 1
        dcg += (math.pow(2, item) - 1)/ math.log(1 + i, 2)
    return dcg
def index_at_k(predictions, k, threshold=0.1):
   #Return precision and recall at k metrics for each user.
    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    ndcgs =dict()
    for uid, user_ratings in user_est_true.items():
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # Number of relevant items
        n_rel = sum((true_r > threshold) for (_, true_r) in user_ratings)
        # Number of recommended items in top k
        n_rec_k = sum((est > threshold) for (est, _) in user_ratings[:k])
        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r > threshold) and (est > threshold)) for (est, true_r) in user_ratings[:k])
        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1
        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1
        #true ratings of recommended items in top k
        l_rec_k = [true_r for (_,true_r) in user_ratings[:k]]
        dcg = calc_dcg(l_rec_k)
        #l_rec_k.sort(reverse=True)
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        l_rel_k = [true_r for (_,true_r) in user_ratings[:k]]
        idcg = calc_dcg(l_rel_k)
        if (idcg==0):idcg = 1
        ndcgs[uid]=dcg*1.0/idcg
    return precisions, recalls, ndcgs

print ("%3s%20s%20s%20s" % ('K','Precisions','Recalls','NDCG'))
for k in [5,10,15,20]:#latent factor
    precisions, recalls, ndcgs = index_at_k(predictions, k=k)
    # Precision and recall can then be averaged over all users
    precision = sum(prec for prec in precisions.values()) / len(precisions)
    recall = sum(rec for rec in recalls.values()) / len(recalls)
    ndcg = sum(ndcg for ndcg in ndcgs.values()) / len(ndcgs)
    print ("%3s%20.8f%20.8f%20.8f" % (k, precision, recall, ndcg))

  K          Precisions             Recalls                NDCG
  5          0.99660174          0.48006813          0.96399688
 10          0.99528075          0.50263986          0.96371162
 15          0.99460948          0.51505572          0.96331910
 20          0.99412368          0.52267691          0.96314211


In [42]:
#KNN,threshold=0.1
#2.Transforming into data format of surprise and spliting the train-set and test-set
# The columns must correspond to user id, item id and ratings (in that order).
reader = sp.Reader(rating_scale=(0, 1))
spdata = sp.Dataset.load_from_df(kbdata[['csr', 'ke', 'irt']],reader)
# sampling random trainset and testset, and test set is made of 10% of the ratings.
#trainset, testset = sp.model_selection.train_test_split(spdata, test_size=.1)
trainset = spdata.build_full_trainset()
testset = trainset.build_testset()

#3.Training the model and predicting ratings for the testset
algo = sp.KNNWithMeans()
algo.fit(trainset)
predictions = algo.test(testset)#testset include positive and negtive sample.

#4.measuring the performance of SVD by precision, recall and  NDCG
#print ('RMSE of testset is:%.8f'%(sp.accuracy.rmse(predictions)))
def calc_dcg(items):
    dcg = 0
    i = 0
    for item in items:
        i += 1
        dcg += (math.pow(2, item) - 1)/ math.log(1 + i, 2)
    return dcg
def index_at_k(predictions, k, threshold=0.1):
   #Return precision and recall at k metrics for each user.
    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    ndcgs =dict()
    for uid, user_ratings in user_est_true.items():
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # Number of relevant items
        n_rel = sum((true_r > threshold) for (_, true_r) in user_ratings)
        # Number of recommended items in top k
        n_rec_k = sum((est > threshold) for (est, _) in user_ratings[:k])
        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r > threshold) and (est > threshold)) for (est, true_r) in user_ratings[:k])
        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1
        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1
        #true ratings of recommended items in top k
        l_rec_k = [true_r for (_,true_r) in user_ratings[:k]]
        dcg = calc_dcg(l_rec_k)
        #l_rec_k.sort(reverse=True)
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        l_rel_k = [true_r for (_,true_r) in user_ratings[:k]]
        idcg = calc_dcg(l_rel_k)
        if (idcg==0):idcg = 1
        ndcgs[uid]=dcg*1.0/idcg
    return precisions, recalls, ndcgs

print ("%3s%20s%20s%20s" % ('K','Precisions','Recalls','NDCG'))
for k in [5,10,15,20]:#latent factor
    precisions, recalls, ndcgs = index_at_k(predictions, k=k)
    # Precision and recall can then be averaged over all users
    precision = sum(prec for prec in precisions.values()) / len(precisions)
    recall = sum(rec for rec in recalls.values()) / len(recalls)
    ndcg = sum(ndcg for ndcg in ndcgs.values()) / len(ndcgs)
    print ("%3s%20.8f%20.8f%20.8f" % (k, precision, recall, ndcg))

Computing the msd similarity matrix...
Done computing similarity matrix.
  K          Precisions             Recalls                NDCG
  5          0.78529268          0.55549431          0.95665176
 10          0.77957600          0.61425370          0.95737474
 15          0.77606672          0.65202463          0.95785396
 20          0.77332110          0.67972914          0.95844873


In [43]:
#CoClustering,threshold=0.1
#2.Transforming into data format of surprise and spliting the train-set and test-set
# The columns must correspond to user id, item id and ratings (in that order).
reader = sp.Reader(rating_scale=(0, 1))
spdata = sp.Dataset.load_from_df(kbdata[['csr', 'ke', 'irt']],reader)
# sampling random trainset and testset, and test set is made of 10% of the ratings.
#trainset, testset = sp.model_selection.train_test_split(spdata, test_size=.1)
trainset = spdata.build_full_trainset()
testset = trainset.build_testset()

#3.Training the model and predicting ratings for the testset
algo = sp.CoClustering()
algo.fit(trainset)
predictions = algo.test(testset)#testset include positive and negtive sample.

#4.measuring the performance of SVD by precision, recall and  NDCG
#print ('RMSE of testset is:%.8f'%(sp.accuracy.rmse(predictions)))
def calc_dcg(items):
    dcg = 0
    i = 0
    for item in items:
        i += 1
        dcg += (math.pow(2, item) - 1)/ math.log(1 + i, 2)
    return dcg
def index_at_k(predictions, k, threshold=0.1):
   #Return precision and recall at k metrics for each user.
    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    ndcgs =dict()
    for uid, user_ratings in user_est_true.items():
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # Number of relevant items
        n_rel = sum((true_r > threshold) for (_, true_r) in user_ratings)
        # Number of recommended items in top k
        n_rec_k = sum((est > threshold) for (est, _) in user_ratings[:k])
        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r > threshold) and (est > threshold)) for (est, true_r) in user_ratings[:k])
        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1
        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1
        #true ratings of recommended items in top k
        l_rec_k = [true_r for (_,true_r) in user_ratings[:k]]
        dcg = calc_dcg(l_rec_k)
        #l_rec_k.sort(reverse=True)
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        l_rel_k = [true_r for (_,true_r) in user_ratings[:k]]
        idcg = calc_dcg(l_rel_k)
        if (idcg==0):idcg = 1
        ndcgs[uid]=dcg*1.0/idcg
    return precisions, recalls, ndcgs

print ("%3s%20s%20s%20s" % ('K','Precisions','Recalls','NDCG'))
for k in [5,10,15,20]:#latent factor
    precisions, recalls, ndcgs = index_at_k(predictions, k=k)
    # Precision and recall can then be averaged over all users
    precision = sum(prec for prec in precisions.values()) / len(precisions)
    recall = sum(rec for rec in recalls.values()) / len(recalls)
    ndcg = sum(ndcg for ndcg in ndcgs.values()) / len(ndcgs)
    print ("%3s%20.8f%20.8f%20.8f" % (k, precision, recall, ndcg))

  K          Precisions             Recalls                NDCG
  5          1.00000000          0.37979640          0.46956242
 10          1.00000000          0.37979640          0.50726660
 15          1.00000000          0.37979640          0.53147843
 20          1.00000000          0.37979640          0.54965492


In [44]:
#SVDPP,threshold=0.1
#2.Transforming into data format of surprise and spliting the train-set and test-set
# The columns must correspond to user id, item id and ratings (in that order).
reader = sp.Reader(rating_scale=(0, 1))
spdata = sp.Dataset.load_from_df(kbdata[['csr', 'ke', 'irt']],reader)
# sampling random trainset and testset, and test set is made of 10% of the ratings.
#trainset, testset = sp.model_selection.train_test_split(spdata, test_size=.1)
trainset = spdata.build_full_trainset()
testset = trainset.build_testset()

#3.Training the model and predicting ratings for the testset
algo = sp.SVDpp()
algo.fit(trainset)
predictions = algo.test(testset)#testset include positive and negtive sample.

#4.measuring the performance of SVD by precision, recall and  NDCG
#print ('RMSE of testset is:%.8f'%(sp.accuracy.rmse(predictions)))
def calc_dcg(items):
    dcg = 0
    i = 0
    for item in items:
        i += 1
        dcg += (math.pow(2, item) - 1)/ math.log(1 + i, 2)
    return dcg
def index_at_k(predictions, k, threshold=0.1):
   #Return precision and recall at k metrics for each user.
    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    ndcgs =dict()
    for uid, user_ratings in user_est_true.items():
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)
        # Number of relevant items
        n_rel = sum((true_r > threshold) for (_, true_r) in user_ratings)
        # Number of recommended items in top k
        n_rec_k = sum((est > threshold) for (est, _) in user_ratings[:k])
        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r > threshold) and (est > threshold)) for (est, true_r) in user_ratings[:k])
        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1
        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1
        #true ratings of recommended items in top k
        l_rec_k = [true_r for (_,true_r) in user_ratings[:k]]
        dcg = calc_dcg(l_rec_k)
        #l_rec_k.sort(reverse=True)
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        l_rel_k = [true_r for (_,true_r) in user_ratings[:k]]
        idcg = calc_dcg(l_rel_k)
        if (idcg==0):idcg = 1
        ndcgs[uid]=dcg*1.0/idcg
    return precisions, recalls, ndcgs

print ("%3s%20s%20s%20s" % ('K','Precisions','Recalls','NDCG'))
for k in [5,10,15,20]:#latent factor
    precisions, recalls, ndcgs = index_at_k(predictions, k=k)
    # Precision and recall can then be averaged over all users
    precision = sum(prec for prec in precisions.values()) / len(precisions)
    recall = sum(rec for rec in recalls.values()) / len(recalls)
    ndcg = sum(ndcg for ndcg in ndcgs.values()) / len(ndcgs)
    print ("%3s%20.8f%20.8f%20.8f" % (k, precision, recall, ndcg))

  K          Precisions             Recalls                NDCG
  5          0.74363254          0.55374854          0.95189569
 10          0.73540521          0.61457554          0.95193570
 15          0.72944207          0.65349744          0.95254446
 20          0.72477783          0.68205860          0.95303638


In [35]:
import theano
import pymc3 as pm
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import mean_squared_error
import theano.tensor as tt
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import LabelEncoder

#1.Loading the dataset and Excluding the outliers
kbdata = pd.read_csv("/data/fjsdata/ctKngBase/kb.csv", sep='|', low_memory=False)#iterator = True)
#kbdata =kbdata.get_chunk(1000)
#kbdata = kbdata.loc[(kbdata['num']<200)]#seven months, one per day
kbdata['num'] = kbdata['num'].apply(lambda x:1)#assume that the csr have grasped the ke if he readed the ke one time.

#2.constructing the probabilistic model
uNum = len(kbdata['csr'].unique())#numbers of csr
iNum = len(kbdata['ke'].unique())#numbers of ke
UI = np.zeros((uNum, iNum))#turn into sparse matrix
for index, row in kbdata.iterrows(): # get each line
    UI[int(row['csr'])][int(row['ke'])] = row['num']
Y_output = theano.shared(UI)#转numpy array
with pm.Model() as IRT_model:
    # Creating the model
    Ab = pm.Normal('Ability', mu=0, sd=1, shape=(uNum,1)) #skill of csr
    Di = pm.Normal('Difficulty', mu=0, sd=1, shape=(1,iNum))#difficulty of ke
    irt = tt.dot(Ab,Di)
    p = pm.Deterministic('p', pm.math.sigmoid(irt))#Rasch model of irt
    Y = pm.Bernoulli('Y',p=p, observed=Y_output)
    
#3.后验分布计算  
with IRT_model:        
    #start=pm.find_MAP()  # 参数初猜
    #step = pm.Metropolis()
    #trace = pm.sample(100,start=start,step=step,chains=2,cores=8)
    inference = pm.ADVI()
    approx = pm.fit(n=1000, method=inference)
    trace = approx.sample(draws=500)

print (trace['Ability']) 
print (trace['Difficulty'])

Problem occurred during compilation with the command line below:
/usr/bin/g++ -shared -g -O3 -fno-math-errno -Wno-unused-label -Wno-unused-variable -Wno-write-strings -march=broadwell -mmmx -mno-3dnow -msse -msse2 -msse3 -mssse3 -mno-sse4a -mcx16 -msahf -mmovbe -maes -mno-sha -mpclmul -mpopcnt -mabm -mno-lwp -mfma -mno-fma4 -mno-xop -mbmi -mbmi2 -mno-tbm -mavx -mavx2 -msse4.2 -msse4.1 -mlzcnt -mrtm -mhle -mrdrnd -mf16c -mfsgsbase -mrdseed -mprfchw -madx -mfxsr -mxsave -mxsaveopt -mno-avx512f -mno-avx512er -mno-avx512cd -mno-avx512pf -mno-prefetchwt1 -mno-clflushopt -mno-xsavec -mno-xsaves -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512ifma -mno-avx512vbmi -mno-clwb -mno-pcommit -mno-mwaitx --param l1-cache-size=32 --param l1-cache-line-size=64 --param l2-cache-size=35840 -mtune=broadwell -DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION -m64 -fPIC -I/usr/local/lib/python3.6/dist-packages/numpy/core/include -I/usr/include/python3.6m -I/usr/local/lib/python3.6/dist-packages/theano/gof/c_

OSError: [Errno 12] Cannot allocate memory

In [6]:
#https://github.com/17zuoye/pyirt
import pandas as pd
from pyirt import irt

data = []
'''
with open("/data/fjsdata/ctKngBase/kb.csv", 'r') as f:
    line_num = 0
    for line in f:
        line_num += 1
        if (line_num != 1):
            if not line.strip():
                continue
            uid, qid, ans = line.strip().split('|')
            if (int(ans)>3):
                data.append([int(uid), int(qid), 1])
            else :data.append([int(uid), int(qid), 0])
print (data[0:5])
'''
data =[[1,1,1],[1,2,0],[2,1,0],[2,2,1]]
item_param, user_param = irt(data, max_iter=2)
print (item_param)
print (user_param)

2019-06-17 01:09:03 INFO: start loading data
start loading data
start loading data
start loading data
start loading data
start loading data
start loading data
2019-06-17 01:09:03 INFO: data loaded
data loaded
data loaded
data loaded
data loaded
data loaded
data loaded
100%|██████████| 2/2 [00:00<00:00, 358.00it/s]
100%|██████████| 2/2 [00:00<00:00, 449.96it/s]
2019-06-17 01:09:03 DEBUG: E step runs for 546.763 sec
E step runs for 546.763 sec
E step runs for 546.763 sec
E step runs for 546.763 sec
E step runs for 546.763 sec
E step runs for 546.763 sec
E step runs for 546.763 sec
100%|██████████| 2/2 [00:00<00:00, 161.33it/s]
2019-06-17 01:09:04 DEBUG: M step runs for 282.779 sec
M step runs for 282.779 sec
M step runs for 282.779 sec
M step runs for 282.779 sec
M step runs for 282.779 sec
M step runs for 282.779 sec
M step runs for 282.779 sec
2019-06-17 01:09:04 DEBUG: score calculating
score calculating
score calculating
score calculating
score calculating
score calculating
score cal

{1: {'beta': -1.8612584399919198e-16, 'alpha': 0.25, 'c': 0.0}, 2: {'beta': -1.8612584399919198e-16, 'alpha': 0.25, 'c': 0.0}}
{1: -1.700029006457271e-16, 2: -1.700029006457271e-16}
