update doc

aksnzhy · Jan 17, 2018 · 83c0c6f · 83c0c6f
1 parent 06af011
commit 83c0c6f
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 37 deletions.
diff --git a/python_api.rst b/python_api.rst
@@ -32,11 +32,17 @@ Here is a simple Python demo to demonstrate how to use xLearn. You can check out
    import xlearn as xl
 
    # Training task
-   ffm_model = xl.create_ffm()
-   ffm_model.setTrain("./small_train.txt")  
-   ffm_model.setValidate("./small_test.txt") 
-   param = {'task':'binary', 'lr':0.2, 'lambda':0.002} 
+   ffm_model = xl.create_ffm()  # Use field-aware factorization machine
+   ffm_model.setTrain("./small_train.txt")   # Training data
+   ffm_model.setValidate("./small_test.txt")  # Validation data
+
+   # param:
+   #  0. binary classification
+   #  1. learning rate
+   #  2. regular lambda
+   param = {'task':'binary', 'lr':0.2, 'lambda':0.002}
             
+   # Train model
    ffm_model.fit(param, "./model.out")  
 
 A portion of the xLearn's output ::
@@ -353,19 +359,6 @@ following Python code multiple times, we may get different loss value at each ep
    The 2nd time: 0.449302
    The 3nd time: 0.449185
 
-Users can set the number of thread for xLearn by using ``nthread`` parameter: ::
-
-   import xlearn as xl
-
-   # Training task
-   ffm_model = xl.create_ffm()
-   ffm_model.setTrain("./small_train.txt")  
-   param = {'task':'binary', 'lr':0.2, 'lambda':0.002, 'nthread':2} 
-            
-   ffm_model.fit(param, "./model.out") 
-
-If you don't set this parameter, xLearn uses all of the CPU cores by default.
-
 Users can disable lock-free training by using ``disableLockFree()`` API. ::
 
    import xlearn as xl

diff --git a/xlearn_api.rst b/xlearn_api.rst
@@ -22,33 +22,33 @@ Options: ::
          4 -- factorization machines (FM)
          5 -- field-aware factorization machines (FFM)
                                                                            
-  -x <metric>          :  The metric can be 'acc', 'prec', 'recall', 'f1', 'auc' (classification), and
-                          'mae', 'mape', 'rmsd (rmse)' (regression). On defaurt, xLearn will not print
-                          any evaluation metric information.                                           
+  -x <metric>          :  The metric can be 'acc', 'prec', 'recall', 'f1', 'auc' for classification, and
+                          'mae', 'mape', 'rmsd (rmse)' for regression. On defaurt, xLearn will not print
+                          any evaluation metric information (only loss value).                                           
                                                                                                      
-  -p <opt_method>      :  Choose the optimization method, including 'sgd', adagrad' and 'ftrl'. On default,
-                          we use the adagrad optimization.
+  -p <opt_method>      :  Choose the optimization method, including 'sgd', adagrad', and 'ftrl'. On default,
+                          we use the 'adagrad' optimization.
                                                                                                 
   -v <validate_file>   :  Path of the validation data file. This option will be empty by default,
-                          and in this way, the xLearn will not perform validation.
+                          and in this way, the xLearn will not perform validation process.
                                                                                              
-  -m <model_file>      :  Path of the model checkpoint file. On default, the model file name will be.
-                          set to 'train_file' + '.model'. If we set this value to 'none', the xLearn will
-                          not dump the model checkpoint after training.
-  -t <txt_model_file>  :  Path of the txt model checkpoint file. On default, this option is empty
+  -m <model_file>      :  Path of the model dump file. On default, the model file name is 'train_file' + '.model'. 
+                          If we set this value to 'none', the xLearn will not dump the model checkpoint after training.
+
+  -t <txt_model_file>  :  Path of the txt model checkpoint file. On default, we do not set this option
                           and xLearn will not dump the txt model. For now, only the bias and linear term
-                          will dump to the txt model checkpoint file.
+                          will be dump to the txt model file.
                                                                             
-  -l <log_file>        :  Path of the log file. Using '/tmp/xlearn_log/' by default.
+  -l <log_file>        :  Path of the log file. Using '/tmp/xlearn_log.*' by default.
                                                                                       
   -k <number_of_K>     :  Number of the latent factor used by fm and ffm tasks. Using 4 by default.
                           Note that, we will get the same model size when setting k to 1 and 4.
                           This is because we use SSE instruction and the memory need to be aligned.
                           So even you assign k = 1, we still fill some dummy zeros from k = 2 to 4.
                                                                                          
-  -r <learning_rate>   :  Learning rate for stochastic gradient descent. Using 0.2 by default.
-                          xLearn uses adaptive gradient descent (AdaGrad) for optimization problem,
-                          and the learning rate will be changed adaptively.
+  -r <learning_rate>   :  Learning rate for optimization method. Using 0.2 by default.
+                          xLearn can use adaptive gradient descent (AdaGrad) for optimization problem,
+                          if you choose AdaGrad method, the learning rate will be changed adaptively.
                                                                                     
   -b <lambda_for_regu> :  Lambda for L2 regular. Using 0.00002 by default. We can disable the
                           regular term by setting this value to 0.0
@@ -59,7 +59,9 @@ Options: ::
   -e <epoch_number>    :  Number of epoch for training. Using 10 by default. Note that, xLearn will
                           perform early-stopping by default, so this value is just a upper bound.
                                                                                        
-  -f <fold_number>     :  Number of folds for cross-validation. Using 5 by default.     
+  -f <fold_number>     :  Number of folds for cross-validation. Using 5 by default.    
+
+  -nthread <thread number> :  Number of thread for multi-thread training.
                                                                                      
   --disk               :  Open on-disk training for large-scale machine learning problems.
                                                                    
@@ -79,13 +81,13 @@ Options: ::
   --quiet              :  Don't print any evaluation information during the training and
                           just train the model quietly.
 
-  -alpha               :  Used by ftrl.
+  -alpha               :  Hyper parameters used by ftrl.
                                        
-  -beta                :  Used by ftrl.
+  -beta                :  Hyper parameters used by ftrl.
                                        
-  -lambda_1            :  Used by ftrl.
+  -lambda_1            :  Hyper parameters used by ftrl.
                                        
-  -lambda_2            :  Used by ftrl.
+  -lambda_2            :  Hyper parameters used by ftrl.
 
 For Prediction: ::
 
@@ -98,6 +100,12 @@ Options: ::
                                                       
   -l <log_file_path>   :  Path of the log file. Using '/tmp/xlearn_log' by default.  
 
+  -nthread <thread number> :  Number of thread for multi-thread training.
+
+  --sign                   :  Converting output to 0 and 1.
+
+  --sigmoid                :  Converting output to 0~1 (problebility).
+
 xLearn Python API
 ------------------------------