Skip to content
This repository has been archived by the owner on Nov 19, 2020. It is now read-only.

Commit

Permalink
GH-55: Adding support for computing TF-IDF vector representations.
Browse files Browse the repository at this point in the history
  • Loading branch information
cesarsouza committed Feb 19, 2017
1 parent cb6052e commit 7c84652
Show file tree
Hide file tree
Showing 4 changed files with 808 additions and 0 deletions.
3 changes: 3 additions & 0 deletions Sources/Accord.MachineLearning/Accord.MachineLearning.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
<Compile Include="Clustering\KMeans\BalancedKMeans.cs" />
<Compile Include="DecisionTrees\DecisionTreeHelper.cs" />
<Compile Include="BaseKNearestNeighbors`2.cs" />
<Compile Include="Representations\TFIDF.cs" />
<Compile Include="Representations\BagOfWords.cs" />
<Compile Include="Rules\AssociationRule.cs" />
<Compile Include="Rules\Apriori.cs" />
Expand Down Expand Up @@ -174,6 +175,8 @@
<Compile Include="VectorMachines\Learning\Base\BaseSupportVectorRegression.cs" />
<Compile Include="VectorMachines\Learning\Base\ISupportVectorMachineLearning.cs" />
<Compile Include="VectorMachines\Learning\LeastSquaresLearning.cs" />
<Compile Include="VectorMachines\Learning\AveragedStochasticGradientDescent.cs" />
<Compile Include="VectorMachines\Learning\StochasticGradientDescent.cs" />
<Compile Include="VectorMachines\Learning\LinearCoordinateDescent.cs" />
<Compile Include="VectorMachines\Learning\LinearDualCoordinateDescent.cs" />
<Compile Include="VectorMachines\Learning\LinearNewtonMethod.cs" />
Expand Down
62 changes: 62 additions & 0 deletions Sources/Accord.MachineLearning/Representations/BagOfWords.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ namespace Accord.MachineLearning
///
[Serializable]
public class BagOfWords : ParallelLearningBase, IBagOfWords<string[]>,
ITransform<string[], Sparse<double>>,
IUnsupervisedLearning<BagOfWords, string[], int[]>
{
// TODO: Replace by TwoWayDictionary
Expand Down Expand Up @@ -125,6 +126,7 @@ public IDictionary<int, string> CodeToString
///
/// <param name="texts">The texts to build the bag of words model from.</param>
///
[Obsolete("Please use the default constructor and pass the texts to the Learn() method.")]
public BagOfWords(params string[][] texts)
{
if (texts == null)
Expand All @@ -139,6 +141,7 @@ public BagOfWords(params string[][] texts)
///
/// <param name="texts">The texts to build the bag of words model from.</param>
///
[Obsolete("Please use the default constructor and pass the texts to the Learn() method.")]
public BagOfWords(params string[] texts)
{
if (texts == null)
Expand Down Expand Up @@ -256,6 +259,31 @@ public double[] Transform(string[] input, double[] result)
return result;
}

/// <summary>
/// Applies the transformation to an input, producing an associated output.
/// </summary>
/// <param name="input">The input data to which
/// the transformation should be applied.</param>
/// <param name="result">The location to where to store the
/// result of this transformation.</param>
/// <returns>The output generated by applying this
/// transformation to the given input.</returns>
public Sparse<double> Transform(string[] input, Sparse<double> result)
{
// Detect all feature words
foreach (string word in input)
{
int j;
if (!stringToCode.TryGetValue(word, out j))
continue;

if (result[j] < MaximumOccurance)
result[j]++;
}

return result;
}

/// <summary>
/// Applies the transformation to an input, producing an associated output.
/// </summary>
Expand Down Expand Up @@ -388,5 +416,39 @@ public BagOfWords Learn(string[][] x, double[] weights = null)

return this;
}





Sparse<double> ITransform<string[], Sparse<double>>.Transform(string[] input)
{
return Sparse.FromDense(Transform(input));
}

Sparse<double>[] ITransform<string[], Sparse<double>>.Transform(string[][] input)
{
return Transform(input, new Sparse<double>[input.Length]);
}

/// <summary>
/// Applies the transformation to a set of input vectors,
/// producing an associated set of output vectors.
/// </summary>
/// <param name="input">The input data to which
/// the transformation should be applied.</param>
/// <param name="result">The location to where to store the
/// result of this transformation.</param>
/// <returns>The output generated by applying this
/// transformation to the given input.</returns>
public Sparse<double>[] Transform(string[][] input, Sparse<double>[] result)
{
var t = ((ITransform<string[], Sparse<double>>)this);
Parallel.For(0, input.Length, ParallelOptions, i =>
{
result[i] = t.Transform(input[i]);
});
return result;
}
}
}
Loading

0 comments on commit 7c84652

Please sign in to comment.