Skip to content

Commit 0178b9e

Browse files
authored
Merge pull request #39 from devfym/develop
add test for correlation and difference
2 parents 25a9164 + ea7236c commit 0178b9e

File tree

7 files changed

+215
-93
lines changed

7 files changed

+215
-93
lines changed

phpunit.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
<directory suffix="Test.php">./tests/Data</directory>
1414
</testsuite>
1515

16+
<testsuite name="Math">
17+
<directory suffix="Test.php">./tests/Math</directory>
18+
</testsuite>
19+
1620
<testsuite name="Regression">
1721
<directory suffix="Test.php">./tests/Regression</directory>
1822
</testsuite>

src/Math/Correlation.php

Lines changed: 41 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class Correlation
1313
* @return float
1414
* Get Pearson's Correlation Coefficient.
1515
*/
16-
public static function pearsonCorrelation(DataFrame $df, $xColumn, $yColumn) : float
16+
public static function pearson(DataFrame $df, $xColumn, $yColumn) : float
1717
{
1818
$n = $df->getIndex();
1919
$x = 0; $y = 0; $xy = 0; $x2 = 0; $y2 = 0;
@@ -44,7 +44,7 @@ public static function pearsonCorrelation(DataFrame $df, $xColumn, $yColumn) : f
4444
* @param $yColumn
4545
* @return float
4646
*/
47-
public static function spearmanRankCorrelation(DataFrame $df, $xColumn, $yColumn) : float
47+
public static function spearman(DataFrame $df, $xColumn, $yColumn) : float
4848
{
4949
$xValue = $df->{$xColumn}->all();
5050
$yValue = $df->{$yColumn}->all();
@@ -78,34 +78,53 @@ public static function spearmanRankCorrelation(DataFrame $df, $xColumn, $yColumn
7878

7979
/**
8080
* @param DataFrame $df
81-
* @return array
81+
* @param $xColumn
82+
* @return float
8283
*/
83-
public static function allPearsonCorrelation(DataFrame $df) : array
84+
public static function kendall(DataFrame $df, $xColumn) : float
8485
{
85-
$arr = [];
8686

87-
$columns = $df->getNumericColumns();
87+
$concordant = [];
88+
$discordant = [];
8889

89-
$numeric_count = count($columns);
90+
for ($i = 0; $i < $df->getIndex(); $i++) {
9091

91-
for ($i = 0; $i < $numeric_count; $i++) {
92+
$concordant_count = 0;
93+
$discordant_count = 0;
9294

93-
for ($j = 0; $j < $numeric_count; $j++) {
95+
for ($j = $i + 1; $j < $df->getIndex(); $j++) {
96+
97+
if ($df->{$xColumn}->get($i) < $df->{$xColumn}->get($j)) {
98+
99+
$concordant_count++;
100+
101+
}
102+
103+
if ($df->{$xColumn}->get($i) > $df->{$xColumn}->get($j)) {
94104

95-
$arr[$i][$j] = self::pearsonCorrelation($df, $columns[$i], $columns[$j]);
105+
$discordant_count++;
96106

107+
}
97108
}
98109

110+
$concordant[$i] = $concordant_count;
111+
$discordant[$i] = $discordant_count;
112+
99113
}
100114

101-
return $arr;
115+
$scon = array_sum($concordant);
116+
$sdis = array_sum($discordant);
117+
118+
$t = ($scon - $sdis) / ($df->getIndex() * ($df->getIndex() - 1) / 2);
119+
120+
return round($t, 4);
102121
}
103122

104123
/**
105124
* @param DataFrame $df
106125
* @return array
107126
*/
108-
public static function allSpearmanCorrelation(DataFrame $df) : array
127+
public static function pearsonAll(DataFrame $df) : array
109128
{
110129
$arr = [];
111130

@@ -117,7 +136,7 @@ public static function allSpearmanCorrelation(DataFrame $df) : array
117136

118137
for ($j = 0; $j < $numeric_count; $j++) {
119138

120-
$arr[$i][$j] = self::spearmanRankCorrelation($df, $columns[$i], $columns[$j]);
139+
$arr[$i][$j] = self::pearson($df, $columns[$i], $columns[$j]);
121140

122141
}
123142

@@ -128,67 +147,26 @@ public static function allSpearmanCorrelation(DataFrame $df) : array
128147

129148
/**
130149
* @param DataFrame $df
131-
* @param $xColumn
132-
* @return float
150+
* @return array
133151
*/
134-
public static function kendallCorrelation(DataFrame $df, $xColumn) : float
152+
public static function spearmanAll(DataFrame $df) : array
135153
{
154+
$arr = [];
136155

137-
$concordant = [];
138-
$discordant = [];
139-
140-
for ($i = 0; $i < $df->getIndex(); $i++) {
141-
142-
$concordant_count = 0;
143-
$discordant_count = 0;
144-
145-
for ($j = $i+1; $j < $df->getIndex(); $j++) {
146-
147-
if ($df->{$xColumn}->get($i) < $df->{$xColumn}->get($j)) {
156+
$columns = $df->getNumericColumns();
148157

149-
$concordant_count++;
158+
$numeric_count = count($columns);
150159

151-
}
160+
for ($i = 0; $i < $numeric_count; $i++) {
152161

153-
if ($df->{$xColumn}->get($i) > $df->{$xColumn}->get($j)) {
162+
for ($j = 0; $j < $numeric_count; $j++) {
154163

155-
$discordant_count++;
164+
$arr[$i][$j] = self::spearman($df, $columns[$i], $columns[$j]);
156165

157-
}
158166
}
159167

160-
$concordant[$i] = $concordant_count;
161-
$discordant[$i] = $discordant_count;
162-
163168
}
164169

165-
$scon = array_sum($concordant);
166-
$sdis = array_sum($discordant);
167-
168-
$t = ($scon - $sdis) / ($df->getIndex() * ($df->getIndex() - 1) / 2);
169-
170-
return round($t, 4);
171-
}
172-
173-
/**
174-
* @param $df
175-
* @param $xColumn
176-
* @param $yColumn
177-
* @return float
178-
*/
179-
public static function fTest(DataFrame $df, $xColumn, $yColumn) : float
180-
{
181-
$f = 0;
182-
183-
$xVariance = $df->{$xColumn}->variance();
184-
$yVariance = $df->{$yColumn}->variance();
185-
186-
if ($xVariance > $yVariance) {
187-
$f = $xVariance / $yVariance;
188-
} else {
189-
$f = $yVariance / $xVariance;
190-
}
191-
192-
return round($f, 4);
170+
return $arr;
193171
}
194172
}

src/Math/Difference.php

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<?php
2+
3+
namespace devfym\IntelliPHP\Math;
4+
5+
use devfym\IntelliPHP\Data\DataFrame;
6+
7+
class Difference
8+
{
9+
/**
10+
* @param $df
11+
* @param $xColumn
12+
* @param $yColumn
13+
* @return float
14+
*/
15+
public static function FTest(DataFrame $df, $xColumn, $yColumn) : float
16+
{
17+
$xVariance = $df->{$xColumn}->variance();
18+
$yVariance = $df->{$yColumn}->variance();
19+
20+
if ($xVariance < $yVariance) {
21+
$f = $xVariance / $yVariance;
22+
} else {
23+
$f = $yVariance / $xVariance;
24+
}
25+
26+
return round($f, 4);
27+
}
28+
}

tests/Data/DataFrameTest.php

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
namespace devfym\Tests\Data;
44

55
use devfym\IntelliPHP\Data\DataFrame;
6-
use devfym\IntelliPHP\Math\Correlation;
76
use PHPUnit\Framework\TestCase;
87

98
class DataFrameTest extends TestCase
@@ -92,33 +91,6 @@ public function testExample() : void
9291
$df2->readArray($data2, true);
9392

9493
$this->assertEquals(['a','b','c'], $df2->getColumns());
95-
96-
$this->assertEquals(0.8442, Correlation::pearsonCorrelation($df, 'height_cm', 'weight_kg'));
97-
98-
$data3 = [
99-
'height_cm' => [172, 168, 172, 180, 178],
100-
'weight_kg' => [78, 56, 36, 36, 46],
101-
'age' => [14.5, 12.2, 15, 14, 12.2]
102-
];
103-
104-
$df3 = new DataFrame();
105-
106-
$df3->readArray($data3);
107-
108-
$this->assertEquals(-0.4778, Correlation::pearsonCorrelation($df3, 'height_cm', 'weight_kg'));
109-
$this->assertEquals(0.05, Correlation::spearmanRankCorrelation($df3, 'height_cm', 'weight_kg'));
110-
111-
$data4 = [
112-
'student_value' => [1, 4, 3, 5, 2]
113-
];
114-
115-
$df4 = new DataFrame();
116-
117-
$df4->readArray($data4);
118-
119-
$this->assertEquals(0.2, Correlation::kendallCorrelation($df4, 'student_value'));
120-
121-
$this->assertEquals(12.7833, Correlation::fTest($df3, 'height_cm', 'weight_kg'));
12294
}
12395
}
12496

tests/Data/SeriesTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class SeriesTest extends TestCase
1313
*/
1414
protected $data = [
1515
'name' => ['aaron', 'bambi', 'celine', 'dennise', 'edwin'],
16-
'height_cm' => [150, 168, 172, 178, 180],
16+
'height_cm' => [168, 150, 172, 178, 180],
1717
'weight_kg' => [36, NULL, 56, 60, 78],
1818
'location' => ['makati', 'manila', NULL, 'pasay', 'pasig']
1919
];
@@ -61,7 +61,7 @@ public function testNumericSeries() : void
6161
$this->assertEquals($this->data['height_cm'], $series->all());
6262

6363
// Return data within indices.
64-
$this->assertEquals([168, 172, 178], $series->withinIndexOf(1, 3));
64+
$this->assertEquals([150, 172, 178], $series->withinIndexOf(1, 3));
6565

6666
// Return single data in Series.
6767
$this->assertEquals(172, $series->get(2));

tests/Math/CorrelationTest.php

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
<?php
2+
3+
namespace devfym\Tests\Math;
4+
5+
use devfym\IntelliPHP\Data\DataFrame;
6+
use devfym\IntelliPHP\Math\Correlation;
7+
use PHPUnit\Framework\TestCase;
8+
9+
class CorrelationTest extends TestCase
10+
{
11+
/**
12+
* Sample Data
13+
*/
14+
protected $data = [
15+
'student_id' => [1, 2, 3, 4, 5],
16+
'name' => ['aaron', 'bambi', 'celine', 'dennise', 'edwin'],
17+
'age' => [14.5, 12.2, 15, 14, 12.2],
18+
'height_cm' => [162, 158, 162, 170, 168],
19+
'weight_kg' => [68, 58, 56, 56, 52],
20+
'gpa' => [1.25, 4.0, 2.75, 4.0, 2.25]
21+
];
22+
23+
/**
24+
* @var DataFrame
25+
*/
26+
protected $df;
27+
28+
/**
29+
* CorrelationTest constructor.
30+
* @param null|string $name
31+
* @param array $data
32+
* @param string $dataName
33+
*/
34+
public function __construct(?string $name = null, array $data = [], string $dataName = '')
35+
{
36+
parent::__construct($name, $data, $dataName);
37+
38+
$this->df = new DataFrame();
39+
40+
$this->df->readArray($this->data);
41+
}
42+
43+
/**
44+
* Test: Pearson Correlation.
45+
*/
46+
public function testPearson() : void
47+
{
48+
$expected_r = -0.4423;
49+
50+
$this->assertEquals($expected_r, Correlation::pearson($this->df, 'height_cm', 'weight_kg'));
51+
52+
$expected_r_array = [
53+
[1, -0.3384, 0.7746, -0.896, 0.2679],
54+
[-0.3384, 1, 0.0234, 0.4395, -0.3059],
55+
[0.7746, 0.0234, 1, -0.4423, 0.0432],
56+
[-0.896, 0.4395, -0.4423, 1, -0.5118],
57+
[0.2679, -0.3059, 0.0432, -0.5118, 1]
58+
];
59+
60+
$this->assertEquals($expected_r_array, Correlation::pearsonAll($this->df));
61+
}
62+
63+
/**
64+
* Test: Spearman Rank Correlation.
65+
*/
66+
public function testSpearman() : void
67+
{
68+
$expected_p = 0.2;
69+
70+
$this->assertEquals($expected_p, Correlation::spearman($this->df, 'height_cm', 'weight_kg'));
71+
72+
$expected_p_array = [
73+
[1, -0.15, 0.7, -0.5, 0.2],
74+
[-0.15, 1, 0.35, 0.55, 0.05],
75+
[0.7, 0.35, 1, 0.2, 0.4],
76+
[-0.5, 0.55, 0.2, 1, 0.2],
77+
[0.2, 0.05, 0.4, 0.2, 1]
78+
];
79+
80+
$this->assertEquals($expected_p_array, Correlation::spearmanAll($this->df));
81+
}
82+
83+
/**
84+
* Test: Kendall Rank Correlation
85+
*/
86+
public function testKendall() : void
87+
{
88+
$expected_t = 0.1;
89+
90+
$this->assertEquals($expected_t, Correlation::kendall($this->df, 'gpa'));
91+
}
92+
}

0 commit comments

Comments
 (0)