Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 110 lines (100 sloc) 3.993 kb
783f30b Andrew Maas all code now in place. sample dataset included but supporting libraries ...
authored
1 classdef LblDmParam < handle
2 %LBLDMPARAM Parameters for a lbl document model optimization
3
4
5 properties
6 % dimensionality of word vector representations
7 RepVecDim = 100;
8 % size of input dictionary (and target dictionary)
9 DictSize = 400;
10 % number of documents in the training set
11 NumDocs = 1000;
12 % weighting factors on the regularizers
13 LambdaRc = 0.00001;
14 LambdaDt = 0.0001;
15 % store filenames used in training
16 BowFname = '';
17 LabelFname = '';
18 VocabFname = '';
19 % number of documents to process at one time in objective
20 % batching for computational purposes only,
21 % full batch used always when computing objective
22 BatchSize = 100000;
23 % if flag is true uses words predicting ratings to train vectors
24 % WARNING ratings option not supported by this code
25 UseRatings = 0;
26 % number of dimensions in rating data. 2 = binary polarity
27 NumRatingDims = 2;
28 % 0 indicates L2 regularization of doc thetas.
29 % otherwise L1 is used, and this sets the logCosh scale
30 % WARNING only L2 supported by this code. L1 doesn't work well
31 L1Reg = 0;
32 end
33
34 methods
35 % accessors to get indices of params in single vector
36
37 % return all params as vector
38 function pv = toVector(AP)
39 pv = [AP.RepVecDim, AP.DictSize, AP.NumDocs, ...
40 AP.LambdaRc, AP.LambdaDt, AP.BatchSize, ...
41 AP.UseRatings, AP.NumRatingDims];
42 end
43 % set params based on vector
44 function [] = fromVector(AP, pv)
45 AP.RepVecDim = pv(1);
46 AP.DictSize = pv(2);
47 AP.NumDocs = pv(3);
48 AP.LambdaRc = pv(4);
49 AP.LambdaDt = pv(5);
50 AP.BatchSize = pv(6);
51 AP.UseRatings = pv(7);
52 AP.NumRatingDims = pv(8);
53 end;
54 % word representation matrix
55 % HACK lblDmObjAltMF assumes repConMat and wb are first 2 params
56 function repConInd = repConIndex(AP)
57 repConInd = 1:(AP.RepVecDim * AP.DictSize);
58 end
59
60 % word bias vector
61 function wbInd = wordBiasIndex(AP)
62 startInd = (AP.RepVecDim * AP.DictSize)+1;
63 wbInd = startInd : (startInd+AP.DictSize-1);
64 end
65 % weights for softmax layer with biases at the end
66 % we use k-1 definition for softmax
67 function rlrInd = ratingLrIndex(AP)
68 startInd = ((AP.RepVecDim+1) * AP.DictSize)+1;
69 rlrInd = startInd : (startInd-1+((AP.NumRatingDims-1)*(AP.RepVecDim+1)));
70 end
71 % all document transform weights
72 function thetaMatInd = thetaMatIndex(AP)
73 matSize = AP.NumDocs * AP.RepVecDim;
74 prevInd = NaN;
75 if AP.UseRatings
76 prevInd = AP.ratingLrIndex();
77 else
78 prevInd = AP.wordBiasIndex();
79 end;
80 startInd = prevInd(end)+1;
81 thetaMatInd = startInd : (startInd+matSize-1);
82 end
83
84 % document weights for a given document(s)
85 % can also gather many documents in sequence
86 function thetaVecInd = thetaVecIndex(AP, docIndStart, numDocs)
87 if nargin < 3
88 numDocs = 1;
89 end;
90 vecSize = AP.RepVecDim * numDocs;
91 prevInd = NaN;
92 if AP.UseRatings
93 prevInd = AP.ratingLrIndex();
94 else
95 prevInd = AP.wordBiasIndex();
96 end;
97 startInd = prevInd(end)+(vecSize*(docIndStart-1)) + 1;
98 thetaVecInd = startInd : (startInd+vecSize-1);
99 end;
100
101 % total number of parameters
102 function numParams = totalNumParams(AP)
103 thetaMatInd = AP.thetaMatIndex();
104 numParams = thetaMatInd(end);
105 end
106 end
107
108 end
109
Something went wrong with that request. Please try again.