Skip to content

Commit

Permalink
[UPDATE] refactor to use strided arrays.
Browse files Browse the repository at this point in the history
  • Loading branch information
kgryte committed Feb 28, 2015
1 parent a7385dc commit 7104769
Show file tree
Hide file tree
Showing 14 changed files with 290 additions and 166 deletions.
11 changes: 10 additions & 1 deletion TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,16 @@ TODO
- row/col names
- only getters!!
- See [link](http://pandas.pydata.org/pandas-docs/dev/comparison_with_sql.html#select)
89.
89. instead of `data` --> `toArray`?
90. allow column-order (order option)
- this has deep implications for the rest of the module, with possible sig increased complexity
91. deep copy TODOs only applies to generic array values; if known dtype, then deep copy is not necessary
- will probably need to use code generation to accommodate all the various array types
92. Creating new linear arrays
- may want to create a routine for zero arrays for pre-initialization to avoid creating a sparse array
- use `push`, rather than `new`
- this routine should replace all `new` instances in the code when creating new strided arrays
93.



Expand Down
77 changes: 54 additions & 23 deletions lib/addCols.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,12 @@ var isSize = require( 'validate.io-size' ),
*/
function addCols( arr, options ) {
/* jshint validthis:true */
var nRows = this._nRows,
var data = this._data,
nRows = this._nRows,
nCols = this._nCols,
chash = this._chash,
rs = this._rStride,
cs = this._cStride,
opts = {},
hash,
names,
Expand Down Expand Up @@ -98,21 +102,17 @@ function addCols( arr, options ) {
if ( idx.length !== len ) {
throw new Error( 'addCols()::invalid option. Number of column indices must equal the number of added columns.' );
}
N = this._nCols + len - 1;
N = nCols + len - 1;
if ( !hasMax( idx, N ) ) {
throw new RangeError( 'addCols()::invalid option. A column index cannot exceed the total number of columns.' );
}
}
// Add the columns to the data array...
this._nCols += len;
if ( idx ) {
N = this._nCols;
d = new Array( nRows );
for ( i = 0; i < nRows; i++ ) {
d[ i ] = new Array( N );
}
n = new Array( N );
hash = {};

// Build a new data array...
d = new Array( nRows*N );
k = 0;
for ( j = 0; j < N; j++ ) {
flg = false;
Expand All @@ -123,16 +123,33 @@ function addCols( arr, options ) {
}
}
if ( flg ) {
name = names[ i ];
// TODO: deep copy
col = arr[ i ];
for ( i = 0; i < nRows; i++ ) {
d[ i ][ j ] = col[ i ];
// TODO: deep copy
d[ i*N + j*cs ] = col[ i ];
}
} else {
for ( i = 0; i < nRows; i++ ) {
d[ i ][ j ] = this._data[ i ][ k ];
d[ i*N + j*cs ] = data[ i*rs + k*cs ];
}
k++;
}
}
// Build a new column names array and an associated hash...
n = new Array( N );
hash = {};
k = 0;
for ( j = 0; j < N; j++ ) {
flg = false;
for ( i = 0; i < len; i++ ) {
if ( idx[ i ] === j ) {
flg = true;
break;
}
}
if ( flg ) {
name = names[ i ];
} else {
name = this._colnames[ k ];
k++;
}
Expand All @@ -143,27 +160,41 @@ function addCols( arr, options ) {
hash[ name ]= [ j ];
}
}
this._data = d;
this._colnames = n;
this._chash = hash;
} else {
for ( i = 0; i < len; i++ ) {
name = names[ i ];
N = this._nCols;
d = new Array( nRows*N );

// TODO: deep copy
col = arr[ i ];
for ( j = 0; j < nRows; j++ ) {
this._data[ j ].push( col[ j ] );
// Build a new data array...
k = 0;
for ( j = 0; j < N; j++ ) {
if ( j < nCols ) {
for ( i = 0; i < nRows; i++ ) {
d[ i*N + j*cs ] = data[ i*rs + j*cs ];
}
} else {
col = arr[ k ];
for ( i = 0; i < nRows; i++ ) {
// TODO: deep copy
d[ i*N + j*cs ] = col[ i ];
}
k++;
}
}
// Append the new column names to the existing column name array and update the column name hash...
for ( i = 0; i < len; i++ ) {
name = names[ i ];
this._colnames.push( name );
N = this._colnames.length;
if ( chash.hasOwnProperty( name ) ) {
chash[ name ].push( N );
chash[ name ].push( nCols+i );
} else {
chash[ name ] = [ N ];
chash[ name ] = [ nCols+i ];
}
}
}
this._data = d;
this._rStride = this._nCols;
return this;
} // end FUNCTION addCols()

Expand Down
58 changes: 44 additions & 14 deletions lib/addRows.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,17 @@ var isSize = require( 'validate.io-size' ),
*/
function addRows( arr, options ) {
/* jshint validthis:true */
var nCols = this._nCols,
var data = this._data,
nRows = this._nRows,
nCols = this._nCols,
rhash = this._rhash,
rs = this._rStride,
cs = this._cStride,
opts = {},
hash,
names,
name,
row,
idx,
len,
flg,
Expand Down Expand Up @@ -98,16 +103,40 @@ function addRows( arr, options ) {
if ( idx.length !== len ) {
throw new Error( 'addRows()::invalid option. Number of row indices must equal the number of added rows.' );
}
N = this._nRows + len - 1;
N = nRows + len - 1;
if ( !hasMax( idx, N ) ) {
throw new RangeError( 'addRows()::invalid option. A row index cannot exceed the total number of rows.' );
}
}
// Add the rows to the data array...
this._nRows += len;
if ( idx ) {
N = this._nRows;
d = new Array( N );

// Build a new data array...
d = new Array( N*nCols );
k = 0;
for ( i = 0; i < N; i++ ) {
flg = false;
for ( j = 0; j < len; j++ ) {
if ( idx[ j ] === i ) {
flg = true;
break;
}
}
if ( flg ) {
row = arr[ j ];
for ( j = 0; j < nCols; j++ ) {
// TODO: deep copy
d[ i*rs + j*cs ] = row[ j ];
}
} else {
for ( j = 0; j < nCols; j++ ) {
d[ i*rs + j*cs ] = data[ k*rs + j*cs ];
}
k++;
}
}
// Build a new row names array and an associated hash...
n = new Array( N );
hash = {};
k = 0;
Expand All @@ -120,11 +149,8 @@ function addRows( arr, options ) {
}
}
if ( flg ) {
// TODO: deep copy
d[ i ] = arr[ j ].slice();
name = names[ j ];
} else {
d[ i ] = this._data[ k ];
name = this._rownames[ k ];
k++;
}
Expand All @@ -139,18 +165,22 @@ function addRows( arr, options ) {
this._rownames = n;
this._rhash = hash;
} else {
// Append the row data to the existing data array...
for ( i = 0; i < len; i++ ) {
row = arr[ i ];
for ( j = 0; j < nCols; j++ ) {
// TODO: deep copy
data.push( row[ j ] );
}
}
// Append the new row names to the existing row name array and update the row name hash...
for ( i = 0; i < len; i++ ) {
name = names[ i ];

// TODO: deep copy
this._data.push( arr[ i ].slice() );

this._rownames.push( name );
N = this._rownames.length;
if ( rhash.hasOwnProperty( name ) ) {
rhash[ name ].push( N );
rhash[ name ].push( nRows+i );
} else {
rhash[ name ] = [ N ];
rhash[ name ] = [ nRows+i ];
}
}
}
Expand Down
54 changes: 31 additions & 23 deletions lib/ctor.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ function DataFrame( arr, options ) {
names,
name,
data,
len,
N,
i;
nRows,
nCols,
row,
i, j;

if ( !nArgs ) {
throw new Error( 'DataFrame()::insufficient input arguments. Must provide a data array.' );
} else if ( nArgs === 2 ) {
Expand All @@ -70,60 +72,64 @@ function DataFrame( arr, options ) {
if ( !isArray( arr ) ) {
throw new TypeError( 'DataFrame()::invalid input argument. Must provide a data array.' );
}
len = arr.length;
nRows = arr.length;
// [1] Dirty check...
if ( !isArray( arr[ 0 ] ) ) {
throw new TypeError( 'DataFrame()::invalid input argument. Data must be an array of arrays.' );
}
// [2] All first-level arrays must be equal length...
N = arr[ 0 ].length;
if ( !isSize( arr, [ null, N ] ) ) {
nCols = arr[ 0 ].length;
if ( !isSize( arr, [ null, nCols ] ) ) {
throw new Error( 'DataFrame()::invalid input argument. Input data array must contain equal length arrays.' );
}

// TODO: deep copy; this is a shallow copy. Need to acct for Boolean, Date, RegExp, Number, circular references, etc.
data = new Array( len );
// TODO: allow for dtype; e.g., Uint8Array, etc

// [3] Copy the input data...
for ( i = 0; i < len; i++ ) {
data[ i ] = arr[ i ].slice();
// [3] Copy the input data into a 1D array...
data = new Array( nRows*nCols );
for ( i = 0; i < nRows; i++ ) {
row = arr[ i ];
for ( j = 0; j < nCols; j++ ) {
// TODO: deep copy. Need to acct for Boolean, Date, RegExp, Number, String, circular refs, etc.
data[ i*nCols + j ] = row[ j ];
}
}
// [4] Number of row names must equal number of rows...
if ( !opts.hasOwnProperty( 'rownames' ) ) {
this._rownames = new Array( len );
for ( i = 0; i < len; i++ ) {
this._rownames = new Array( nRows );
for ( i = 0; i < nRows; i++ ) {
this._rownames[ i ] = i.toString();
}
} else {
names = opts.rownames;
if ( !isStringArray( names ) ) {
throw new TypeError( 'DataFrame()::invalid option. Row names must be an array of strings.' );
}
if ( names.length !== len ) {
if ( names.length !== nRows ) {
throw new Error( 'DataFrame()::invalid option. Number of row names must equal the number of rows.' );
}
this._rownames = names.slice();
}
// [5] Number of column names must equal number of columns...
if ( !opts.hasOwnProperty( 'colnames' ) ) {
this._colnames = new Array( N );
for ( i = 0; i < N; i++ ) {
this._colnames = new Array( nCols );
for ( i = 0; i < nCols; i++ ) {
this._colnames[ i ] = i.toString();
}
} else {
names = opts.colnames;
if ( !isStringArray( names ) ) {
throw new TypeError( 'DataFrame()::invalid option. Column names must be an array.' );
}
if ( opts.colnames.length !== N ) {
if ( opts.colnames.length !== nCols ) {
throw new Error( 'DataFrame()::invalid option. Number of column names does not equal the number of columns.' );
}
this._colnames = names.slice();
}
// [5] Create lookup tables for row and column names...
this._rhash = {};
names = this._rownames;
for ( i = 0; i < names.length; i++ ) {
for ( i = 0; i < nRows; i++ ) {
name = names[ i ];
if ( this._rhash.hasOwnProperty( name ) ) {
this._rhash[ name ].push( i );
Expand All @@ -133,7 +139,7 @@ function DataFrame( arr, options ) {
}
this._chash = {};
names = this._colnames;
for ( i = 0; i < names.length; i++ ) {
for ( i = 0; i < nCols; i++ ) {
name = names[ i ];
if ( this._chash.hasOwnProperty( name ) ) {
this._chash[ name ].push( i );
Expand All @@ -142,10 +148,12 @@ function DataFrame( arr, options ) {
}
}
this._data = data;
this._nRows = len;
this._nCols = N;
this._rid = len;
this._cid = N;
this._nRows = nRows;
this._nCols = nCols;
this._rStride = nCols;
this._cStride = 1;
this._rid = nRows;
this._cid = nCols;
return this;
} // end FUNCTION DataFrame()

Expand Down

0 comments on commit 7104769

Please sign in to comment.