In [7]:
import sys
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_svmlight_file
from sklearn.datasets import dump_svmlight_file
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.svm import SVC
from sklearn.linear_model           
from sklearn.linear_model  import LinearRegression
import matplotlib.pylab as plt
from sklearn.model_selection import GridSearchCV
from pandas import DataFrame

def loadTrainFile():
    tmp = np.loadtxt("train.csv", dtype=np.str, delimiter=",")
    userID = tmp[1:,0].astype(int)
    item1 = tmp[1:,1].astype(int)
    item2 = tmp[1:,2].astype(int)
    labels = tmp[1:,3].astype(int)
    return userID, item1, item2, labels
def loadTestFile():
    tmp = np.loadtxt("test.csv", dtype=np.str, delimiter=",")
    userID = tmp[1:,0].astype(int)
    item1 = tmp[1:,1].astype(int)
    item2 = tmp[1:,2].astype(int)
    return userID, item1, item2
def loadUserFile():
    tmp = np.loadtxt("users.csv", dtype=str, delimiter=",")
    return tmp[1:,1:]

def loadItemFile():
    tmp = np.loadtxt("items.csv", dtype=np.str, delimiter=",")
    return tmp[1:,1:]



In [8]:
########### Data ###########
user_dic = loadUserFile()
item_dic = loadItemFile()
userID, item1, item2, labels = loadTrainFile()

preference = np.zeros([len(user_dic),10], dtype=int)

for idx, label in enumerate(labels):
    if(label==0):
        preference[userID[idx]-1][item1[idx]-1]+=1
        preference[userID[idx]-1][item2[idx]-1]-=1
    else:
        preference[userID[idx]-1][item1[idx]-1]-=1
        preference[userID[idx]-1][item2[idx]-1]+=1

preference = preprocessing.scale(preference, axis=1, copy=False)
X_train = []
y_train = []
for i in range(len(user_dic)):
    for j in range(len(item_dic)):
        X_train.append(np.concatenate([user_dic[i].astype(float),item_dic[j].astype(float)]))
        y_train.append(preference[i][j])
X_train = np.array(X_train)
y_train = np.array(y_train).reshape(len(y_train))
print(y_train)

########### Data ###########

[ 0.         -1.13592367  1.13592367  0.85194275  0.85194275 -0.56796183
 -1.98786642 -0.56796183  0.28398092  1.13592367  0.65938047 -0.65938047
  1.31876095 -0.32969024 -0.98907071 -0.65938047  1.64845118 -0.65938047
  0.98907071 -1.31876095 -0.56796183  1.13592367 -0.56796183 -0.28398092
 -1.41990459  0.56796183  1.98786642  0.56796183 -0.28398092 -1.13592367
  0.58722022 -1.17444044  1.76166066  0.88083033  0.29361011 -1.17444044
 -0.88083033 -1.17444044  0.88083033  0.          0.          0.
  0.         -1.18585412  1.18585412  0.79056942 -1.18585412  1.58113883
  0.39528471 -1.58113883  0.62017367 -1.24034735  1.24034735  0.31008684
  0.93026051  0.         -2.17060786 -0.62017367  0.31008684  0.62017367
  0.         -1.17444044  1.76166066  0.88083033  0.88083033 -1.17444044
 -0.29361011 -1.17444044  0.88083033 -0.58722022  0.60858062 -1.21716124
 -0.60858062 -0.30429031  1.52145155  0.60858062 -1.52145155  1.21716124
 -0.91287093  0.60858062  0.          0.31311215  0.9393364



In [11]:
########### Model ###########

# # Split data into training data and testing data
X_train_fold, X_test_fold, y_train_fold, y_test_fold = train_test_split(X_train, y_train, test_size=0.1, random_state=0)
print(y_train_fold)
print(X_train_fold)
svm = LinearRegression()
svm.fit(X_train_fold,y_train_fold)
#model評估
y_pred = svm.predict(X_test)
print("Misclassified sample: %d" % (Y_test != y_pred).sum())
print("Accuracy: %.2f" % accuracy_score(Y_test,y_pred))
precision,recall,fscore,support = precision_recall_fscore_support(Y_test, y_pred, average='micro')
print("precision: ", precision)
print("recall: ", recall)
print("fscore: ", fscore)

[ 0.64549722  1.76166066  0.57735027  1.15044748 -0.2795085   0.72547625
  0.55048188  0.4472136   0.32969024 -1.17444044  0.64549722  0.91287093
  0.29361011  0.28171808  0.88083033 -0.58722022  0.         -0.4472136
  0.85194275 -1.33333333 -1.46805055 -1.24034735  1.31876095  1.34839972
 -0.67419986  0.62017367 -1.17444044 -0.91287093 -0.29361011  1.19522861
  0.88083033  0.4472136  -1.95655948  0.93026051 -0.60858062 -1.01129979
  0.91287093 -0.65938047 -0.96824584  0.79056942 -1.98786642  0.29880715
  0.30429031 -1.19522861  0.60858062  0.65938047 -0.30429031 -0.58722022
 -0.4472136  -0.89642146  0.8660254  -1.11803399  0.66666667 -0.60858062
 -1.19522861  0.89642146 -0.2795085   1.21716124  1.01129979 -0.84515425
 -0.47673129 -1.62697843  0.          0.          1.17444044  1.17444044
 -1.7038855  -1.17444044  0.29880715  0.28171808  0.58722022  0.88083033
  0.88083033 -0.33709993  1.12687234  0.89642146  0.57735027  1.93649167
 -1.17444044  0.          1.82574186 -0.29361011  0.

NameError: name 'LinearRegression' is not defined

In [None]:
########## Test ##########
print("########## Start Test ##########")

userID, item1, item2 = loadTestFile()

X_test_item1 = np.concatenate([user_dic[userID-1].astype(float), item_dic[item1-1]], axis=1)
X_test_item2 = np.concatenate([user_dic[userID-1].astype(float), item_dic[item2-1]], axis=1)
user_preference_item1 = sess.run(logits, feed_dict={x:X_test_item1})
user_preference_item2 = sess.run(logits, feed_dict={x:X_test_item2})

test_output=[['User-Item1-Item2','Preference']]
for idx in range(userID.shape[0]):
    entry = str(int(userID[idx]))+'-'+str(int(item1[idx]))+'-'+str(int(item2[idx]))
    if(user_preference_item1[idx]>=user_preference_item2[idx]):
        value=0
    else:
        value=1
    test_output.append([entry,value])
print(test_output)
np.savetxt("output.csv", np.array(test_output, dtype=np.str), fmt='%s,%s', delimiter=",")
# print(X_test.shape)
# print(X_test.astype(float))
# for idx, label in enumerate(labels):
# #     print(user_preference[userID[idx]*item1[idx]-1])
# #     print(user_preference[userID[idx]*item2[idx]-1])
#     if(label==0 and (user_preference[userID[idx]*item1[idx]-1]>=user_preference[userID[idx]*item2[idx]-1])):
#         hit+=1
#     elif(label==1 and (user_preference[userID[idx]*item1[idx]-1]<=user_preference[userID[idx]*item2[idx]-1])):
#         hit+=1
        
# test_output=[['User-Item1-Item2','Preference']]
# for idx in range(pridict_output.shape[0]):
#     entry = str(int(userID[idx]))+'-'+str(int(item1[idx]))+'-'+str(int(item2[idx]))
#     value = pridict_output[idx]
#     test_output.append([entry,value])

# print(test_output)
# np.savetxt("output.csv", np.array(test_output, dtype=np.str), fmt='%s,%s', delimiter=",")