Skip to content
This repository has been archived by the owner on Nov 19, 2020. It is now read-only.

Commit

Permalink
Working on documentation.
Browse files Browse the repository at this point in the history
  • Loading branch information
cesarsouza committed Mar 15, 2015
1 parent c4ee453 commit 4c95329
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 23 deletions.
13 changes: 9 additions & 4 deletions Release notes.txt
Expand Up @@ -19,20 +19,25 @@ Version updates and fixes:
- Improving sample generation for Poisson and Rayleigh distributions;
- Updating all univariate distributions to support sample generation;
- Making sure all probability distributions implement IFormattable;
- Adding Generalized Beta distribution with PERT estimation;
- Adding sample generation in Beta and Generalized Beta distributions;
- Adding estimation using the Method-of-moments and Maximum Likelihood;
- Adding support for weighted samples in LogisticRegressionAnalysis;
- Adding a named constructor to create an Analysis from summary data;
- Adding all missing Shapiro-Wilk distribution's methods;
- Adding a common interface for radial basis function kernels;
- Adding a new generic Gaussian kernel for creating composite kernels;
- Adding a Windowing filter in the Statistics filters namespace.

* Accord.Math
- Updating Augmented Lagragian to detect situations where the
inner optimization algorithm has diverged more accurately;
- Updating Augmented Lagragian to detect more accurately
when the inner optimization algorithm has diverged;
- Adding a new Fast Fourier Transform (FFT) implementation for general
matrices and vectors whose dimensions are not necessarily powers of 2.

* Accord.MachineLearning
- Tree inducing algorithms (ID3 and C4.5) can now reuse decision
variables multiple times when creating a decision tree;
- Updating tree inducing algorithms (ID3 and C4.5) se they can reuse
decision variables multiple times when creating a decision tree;
- Correcting Levenberg-Marquardt's chain-rule Jacobian calculation
when there are many output neurons in the learned neural network;
- Updating the way SVM learning algorithms detect whether a machine is linear or not;
Expand Down
62 changes: 62 additions & 0 deletions Sources/Accord.Statistics/Analysis/LogisticRegressionAnalysis.cs
Expand Up @@ -62,6 +62,12 @@ namespace Accord.Statistics.Analysis
/// </remarks>
///
/// <example>
/// <para>
/// The following example shows to create a Logistic regresion analysis using a full
/// dataset composed of input vectors and a binary output vector. Each input vector
/// has an associated label (1 or 0) in the output vector, where 1 represents a positive
/// label (yes, or true) and 0 represents a negative label (no, or false).</para>
///
/// <code>
/// // Suppose we have the following data about some patients.
/// // The first variable is continuous and represent patient
Expand Down Expand Up @@ -125,6 +131,62 @@ namespace Accord.Statistics.Analysis
///
/// // For those inputs, the answer probability is approximately 75%.
/// </code>
///
/// <para>
/// The analysis can also be created from data given in a summary form. Instead of having
/// one input vector associated with one positive or negative label, each input vector is
/// associated with the proportion of positive to negative labels in the original dataset.
/// </para>
///
/// <code>
/// // Suppose we have a (fictitious) data set about patients who
/// // underwent cardiac surgery. The first column gives the number
/// // of arterial bypasses performed during the surgery. The second
/// // column gives the number of patients whose surgery went well,
/// // while the third column gives the number of patients who had
/// // at least one complication during the surgery.
/// //
/// int[,] data =
/// {
/// // # of stents success complications
/// { 1, 140, 45 },
/// { 2, 130, 60 },
/// { 3, 150, 31 },
/// { 4, 96, 65 }
/// };
///
///
/// double[][] inputs = data.GetColumn(0).ToDouble().ToArray();
///
/// int[] positive = data.GetColumn(1);
/// int[] negative = data.GetColumn(2);
///
/// // Create a new Logistic Regression Analysis from the summary data
/// var regression = LogisticRegressionAnalysis.FromSummary(inputs, positive, negative);
///
/// regression.Compute(); // compute the analysis.
///
/// // Now we can show a summary of the analysis
/// DataGridBox.Show(regression.Coefficients);
///
///
/// // We can also investigate all parameters individually. For
/// // example the coefficients values will be available at the
/// // vector
///
/// double[] coef = regression.CoefficientValues;
///
/// // The first value refers to the model's intercept term. We
/// // can also retrieve the odds ratios and standard errors:
///
/// double[] odds = regression.OddsRatios;
/// double[] stde = regression.StandardErrors;
///
///
/// // Finally, we can use it to estimate risk for a new patient
/// double y = regression.Regression.Compute(new double[] { 4 });
/// </code>
///
/// </example>
///
[Serializable]
Expand Down
Expand Up @@ -224,30 +224,79 @@ public void ComputeTest4()
[TestMethod()]
public void FromSummaryTest1()
{
// Suppose we have a (fictitious) data set about patients who
// underwent cardiac surgery. The first column gives the number
// of arterial bypasses performed during the surgery. The second
// column gives the number of patients whose surgery went well,
// while the third column gives the number of patients who had
// at least one complication during the surgery.
//
int[,] data =
{
{ 1, 140, 45 },
{ 2, 130, 60 },
{ 3, 150, 31 },
{ 4, 96, 65 }
// # of stents success complications
{ 1, 140, 45 },
{ 2, 130, 60 },
{ 3, 150, 31 },
{ 4, 96, 65 }
};


double[][] inputs = data.GetColumn(0).ToDouble().ToArray();

int[] positive = data.GetColumn(1);
int[] negative = data.GetColumn(2);

// Create a new Logistic Regression Analysis from the summary data
var regression = LogisticRegressionAnalysis.FromSummary(inputs, positive, negative);

regression.Compute(); // compute the analysis.

// Now we can show a summary of the analysis
// DataGridBox.Show(regression.Coefficients);


// We can also investigate all parameters individually. For
// example the coefficients values will be available at the
// vector

double[] coef = regression.CoefficientValues;

// The first value refers to the model's intercept term. We
// can also retrieve the odds ratios and standard errors:

double[] odds = regression.OddsRatios;
double[] stde = regression.StandardErrors;


// Finally, we can use it to estimate risk for a new patient
double y = regression.Regression.Compute(new double[] { 4 });




Assert.AreEqual(3.7586367581050162, odds[0], 1e-8);
Assert.AreEqual(0.85772731075090014, odds[1], 1e-8);
Assert.IsFalse(odds.HasNaN());

Assert.AreEqual(0.20884336554629004, stde[0], 1e-8);
Assert.AreEqual(0.075837785246620285, stde[1], 1e-8);
Assert.IsFalse(stde.HasNaN());

Assert.AreEqual(0.67044096045332713, y, 1e-8);
Assert.IsFalse(Double.IsNaN(y));

LogisticRegressionAnalysis expected;
LogisticRegressionAnalysis actual;


{
int[] qtr = data.GetColumn(0);
int[] positive = data.GetColumn(1);
int[] negative = data.GetColumn(2);

var expanded = Accord.Statistics.Tools.Expand(qtr, positive, negative);

double[][] inputs = expanded.GetColumn(0).ToDouble().ToArray();
double[][] inp = expanded.GetColumn(0).ToDouble().ToArray();
double[] outputs = expanded.GetColumn(1).ToDouble();

expected = new LogisticRegressionAnalysis(inputs, outputs);
expected = new LogisticRegressionAnalysis(inp, outputs);

expected.Compute();

Expand All @@ -260,17 +309,8 @@ public void FromSummaryTest1()
}


{
double[][] qtr = data.GetColumn(0).ToDouble().ToArray();

int[] positive = data.GetColumn(1);
int[] negative = data.GetColumn(2);

actual = LogisticRegressionAnalysis.FromSummary(qtr, positive, negative);

actual.Compute();
}

var actual = regression;
Assert.AreEqual(expected.Coefficients[0].Value, actual.Coefficients[0].Value, 1e-8);
Assert.AreEqual(expected.Coefficients[1].Value, actual.Coefficients[1].Value, 1e-8);

Expand Down

0 comments on commit 4c95329

Please sign in to comment.