Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 82 additions & 5 deletions danfojs-browser/src/preprocessing/scalers.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const utils = new Utils();
export class MinMaxScaler {
/**
* Fit minmax scaler on data, to obtain their min and max value
* @param {data} data [DataRame | Series | Array]
* @param {data} data [DataFrame | Series | Array]
* @returns Array
*/
fit(data) {
Expand Down Expand Up @@ -77,15 +77,58 @@ export class MinMaxScaler {
.arraySync();
return new DataFrame(output_data);
} else {
throw Error("Value Error: Data type not supoorted");
throw Error("Value Error: Data type not supported");
}
}

/**
* Restore a transformed array to their original values,
* using the min and max generated from the fitting on data
* @param {Series|Array|DataFrame} data
* @returns Series|DataFrame
*/
inverse_transform(data) {
if (data instanceof Series) {
if (data.dtypes.includes("string")) {
throw Error("Dtype Error: Cannot perform operation on string dtypes");
}
let tensor_data = tensor(data.values);
let output_data = tensor_data
.mul(this.max.sub(this.min))
.add(this.min)
.arraySync();
return new Series(output_data);
} else if (Array.isArray(data)) {
let tensor_data = tensor(data);
let output_data = tensor_data
.mul(this.max.sub(this.min))
.add(this.min)
.arraySync();
if (utils.__is_1D_array(data)) {
return new Series(output_data);
} else {
return new DataFrame(output_data);
}
} else if (data instanceof DataFrame) {
if (data.dtypes.includes("string")) {
throw Error("Dtype Error: Cannot perform operation on string dtypes");
}
let tensor_data = tensor(data.values);
let output_data = tensor_data
.mul(this.max.sub(this.min))
.add(this.min)
.arraySync();
return new DataFrame(output_data);
} else {
throw Error("Value Error: Data type not supported");
}
}
}

export class StandardScaler {
/**
*
* @param {data} data [DataRame | Series | Array]
* @param {data} data [DataFame | Series | Array]
* @returns Array
*/
fit(data) {
Expand Down Expand Up @@ -140,7 +183,41 @@ export class StandardScaler {
let output_data = tensor_data.sub(this.mean).div(this.std).arraySync();
return new DataFrame(output_data);
} else {
throw Error("Value Error: Data type not supoorted");
throw Error("Value Error: Data type not supported");
}
}

/**
* Restore a transformed array to their original values,
* using the mean and std generated from the fitting on data
* @param {Series|Array|DataFrame} data
* @returns Series|DataFrame
*/
inverse_transform(data) {
if (data instanceof Series) {
if (data.dtypes.includes("string")) {
throw Error("Dtype Error: Cannot perform operation on string dtypes");
}
let tensor_data = tensor(data.values);
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
return new Series(output_data);
} else if (Array.isArray(data)) {
let tensor_data = tensor(data);
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
if (utils.__is_1D_array(data)) {
return new Series(output_data);
} else {
return new DataFrame(output_data);
}
} else if (data instanceof DataFrame) {
if (data.dtypes.includes("string")) {
throw Error("Dtype Error: Cannot perform operation on string dtypes");
}
let tensor_data = tensor(data.values);
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
return new DataFrame(output_data);
} else {
throw Error("Value Error: Data type not supported");
}
}
}
Expand Down Expand Up @@ -237,7 +314,7 @@ export class StandardScaler {

// /**
// * Fit robust scalar on data to obtain the first quantile and third quantile
// * @param {data} data [DataRame | Series | Array]
// * @param {data} data [DataFrame | Series | Array]
// * @returns Array
// */
// fit(data){
Expand Down
28 changes: 28 additions & 0 deletions danfojs-browser/tests/preprocessing/scaler.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,20 @@ describe("Preprocessing", function(){
assert.deepEqual(scaler.transform(new dfd.Series(data)).values, result);
assert.deepEqual(scaler.transform([ 2, 2 ]).values, transform_data);
});
it("should be able to inverse the normalization of a Series", function() {
let data = [ -1, 2, -0.5, 60, 101, 18 ];
let result = [ 0, 0.029411764815449715, 0.0049019609577953815, 0.5980392098426819, 1, 0.18627451360225677 ];
let scaler = new dfd.MinMaxScaler();
scaler.fit(new dfd.Series(data));
assert.deepEqual(scaler.inverse_transform(new dfd.Series(result)).values, data);
});
it("should be able to inverse the normalization of a DataFrame", function(){
let data = [ [ -1, 2 ], [ -0.5, 6 ], [ 0, 10 ], [ 1, 18 ] ];
let result = [ [ 0, 0 ], [ 0.25, 0.25 ], [ 0.5, 0.5 ], [ 1, 1 ] ];
let scaler = new dfd.MinMaxScaler();
scaler.fit(new dfd.DataFrame(data));
assert.deepEqual(scaler.inverse_transform(new dfd.DataFrame(result)).values, data);
});
});

describe("StandardScaler", function(){
Expand All @@ -34,6 +48,20 @@ describe("Preprocessing", function(){
assert.deepEqual(scaler.fit(new dfd.DataFrame(data)).round().values, fit_data);
assert.deepEqual(scaler.transform([ [ 2, 2 ] ]).round().values, transform_data);
});
it("should be able to inverse the normalization of a Series", function() {
let data = [ -1, 2, -0.5, 60, 101, 18 ];
let result = [ 0, 0.029411764815449715, 0.0049019609577953815, 0.5980392098426819, 1, 0.18627451360225677 ];
let scaler = new dfd.MinMaxScaler();
scaler.fit(new dfd.Series(data));
assert.deepEqual(scaler.inverse_transform(new dfd.Series(result)).values, data);
});
it("should be able to inverse the normalization of a DataFrame", function(){
let data = [ [ -1, 2 ], [ -0.5, 6 ], [ 0, 10 ], [ 1, 18 ] ];
let result = [ [ 0, 0 ], [ 0.25, 0.25 ], [ 0.5, 0.5 ], [ 1, 1 ] ];
let scaler = new dfd.MinMaxScaler();
scaler.fit(new dfd.DataFrame(data));
assert.deepEqual(scaler.inverse_transform(new dfd.DataFrame(result)).values, data);
});
});


Expand Down
2 changes: 2 additions & 0 deletions danfojs-browser/types/preprocessing/scalers.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export class MinMaxScaler {
* @returns array
*/
transform(data?: any): Series | DataFrame;
inverse_transform(data?: any): Series | DataFrame;
}
export class StandardScaler {
/**
Expand All @@ -24,6 +25,7 @@ export class StandardScaler {
std?: any;
mean?: any;
transform(data?: any): Series | DataFrame;
inverse_transform(data?: any): Series | DataFrame;
}
import { Series } from "../core/series";
import { DataFrame } from "../core/frame";
78 changes: 78 additions & 0 deletions danfojs-node/src/preprocessing/scalers.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,50 @@ export class MinMaxScaler {
throw Error("Value Error: Data type not supoorted");
}
}


/**
* Restore a transformed array to their original values,
* using the min and max generated from the fitting on data
* @param {Series|Array|DataFrame} data
* @returns Series|DataFrame
*/
inverse_transform(data) {
if (data instanceof Series) {
if (data.dtypes.includes("string")) {
throw Error("Dtype Error: Cannot perform operation on string dtypes");
}
let tensor_data = tf.tensor(data.values);
let output_data = tensor_data
.mul(this.max.sub(this.min))
.add(this.min)
.arraySync();
return new Series(output_data);
} else if (Array.isArray(data)) {
let tensor_data = tf.tensor(data);
let output_data = tensor_data
.mul(this.max.sub(this.min))
.add(this.min)
.arraySync();
if (utils.__is_1D_array(data)) {
return new Series(output_data);
} else {
return new DataFrame(output_data);
}
} else if (data instanceof DataFrame) {
if (data.dtypes.includes("string")) {
throw Error("Dtype Error: Cannot perform operation on string dtypes");
}
let tensor_data = tf.tensor(data.values);
let output_data = tensor_data
.mul(this.max.sub(this.min))
.add(this.min)
.arraySync();
return new DataFrame(output_data);
} else {
throw Error("Value Error: Data type not supoorted");
}
}
}

export class StandardScaler {
Expand Down Expand Up @@ -143,6 +187,40 @@ export class StandardScaler {
throw Error("Value Error: Data type not supoorted");
}
}

/**
* Restore a transformed array to their original values,
* using the mean and std generated from the fitting on data
* @param {Series|Array|DataFrame} data
* @returns Series|DataFrame
*/
inverse_transform(data) {
if (data instanceof Series) {
if (data.dtypes.includes("string")) {
throw Error("Dtype Error: Cannot perform operation on string dtypes");
}
let tensor_data = tf.tensor(data.values);
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
return new Series(output_data);
} else if (Array.isArray(data)) {
let tensor_data = tf.tensor(data);
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
if (utils.__is_1D_array(data)) {
return new Series(output_data);
} else {
return new DataFrame(output_data);
}
} else if (data instanceof DataFrame) {
if (data.dtypes.includes("string")) {
throw Error("Dtype Error: Cannot perform operation on string dtypes");
}
let tensor_data = tf.tensor(data.values);
let output_data = tensor_data.mul(this.std).add(this.mean).arraySync();
return new DataFrame(output_data);
} else {
throw Error("Value Error: Data type not supoorted");
}
}
}

// export class RobustScaler{
Expand Down
28 changes: 28 additions & 0 deletions danfojs-node/tests/preprocessing/scaler.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,20 @@ describe("Preprocessing", function(){
assert.deepEqual(scaler.transform(new Series(data)).values, result);
assert.deepEqual(scaler.transform([ 2, 2 ]).values, transform_data);
});
it("should be able to inverse the normalization of a Series", function() {
let data = [ -1, 2, -0.5, 60, 101, 18 ];
let result = [ 0, 0.029411764815449715, 0.0049019609577953815, 0.5980392098426819, 1, 0.18627451360225677 ];
let scaler = new MinMaxScaler();
scaler.fit(new Series(data));
assert.deepEqual(scaler.inverse_transform(new Series(result)).values, data);
});
it("should be able to inverse the normalization of a DataFrame", function(){
let data = [ [ -1, 2 ], [ -0.5, 6 ], [ 0, 10 ], [ 1, 18 ] ];
let result = [ [ 0, 0 ], [ 0.25, 0.25 ], [ 0.5, 0.5 ], [ 1, 1 ] ];
let scaler = new MinMaxScaler();
scaler.fit(new DataFrame(data));
assert.deepEqual(scaler.inverse_transform(new DataFrame(result)).values, data);
});
});

describe("StandardScaler", function(){
Expand All @@ -40,6 +54,20 @@ describe("Preprocessing", function(){
assert.deepEqual(scaler.fit(new DataFrame(data)).round().values, fit_data);
assert.deepEqual(scaler.transform([ [ 2, 2 ] ]).round().values, transform_data);
});
it("should be able to inverse the normalization of a Series", function() {
let data = [ -1, 2, -0.5, 60, 101, 18 ];
let result = [ 0, 0.029411764815449715, 0.0049019609577953815, 0.5980392098426819, 1, 0.18627451360225677 ];
let scaler = new MinMaxScaler();
scaler.fit(new Series(data));
assert.deepEqual(scaler.inverse_transform(new Series(result)).values, data);
});
it("should be able to inverse the normalization of a DataFrame", function(){
let data = [ [ -1, 2 ], [ -0.5, 6 ], [ 0, 10 ], [ 1, 18 ] ];
let result = [ [ 0, 0 ], [ 0.25, 0.25 ], [ 0.5, 0.5 ], [ 1, 1 ] ];
let scaler = new MinMaxScaler();
scaler.fit(new DataFrame(data));
assert.deepEqual(scaler.inverse_transform(new DataFrame(result)).values, data);
});
});


Expand Down
2 changes: 2 additions & 0 deletions danfojs-node/types/preprocessing/scalers.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export class MinMaxScaler {
* @returns array
*/
transform(data?: any): Series | DataFrame;
inverse_transform(data?: any): Series | DataFrame;
}
export class StandardScaler {
/**
Expand All @@ -24,6 +25,7 @@ export class StandardScaler {
std?: any;
mean?: any;
transform(data?: any): Series | DataFrame;
inverse_transform(data?: any): Series | DataFrame;
}
import { Series } from "../core/series";
import { DataFrame } from "../core/frame";