Skip to content
Browse files

METAMODL #126 - Update to handle some non-ASCII encodings.

BAMFCSV will now handle UTF-8, Latin-1, and should be able to deal
with other encodings that are compatible with 7-bit ASCII.
  • Loading branch information...
1 parent 2b8c506 commit bfbfedcc9070a83075f4ef99d87a7d649759344e Sam Umbach and Craig Andera committed Jul 7, 2011
Showing with 17 additions and 14 deletions.
  1. +11 −12 ext/bamfcsv/bamfcsv_ext.c
  2. +6 −2 spec/lib/bamfcsv_spec.rb
View
23 ext/bamfcsv/bamfcsv_ext.c
@@ -1,11 +1,12 @@
#include <ruby/ruby.h>
+#include <ruby/encoding.h>
#include <stdlib.h>
#include <stdio.h>
VALUE BAMFCSV_module;
VALUE BAMFCSV_MalformedCSVError_class;
-VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count) {
+VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count, rb_encoding *enc) {
if (*cell_end == '\r')
cell_end--;
@@ -17,12 +18,16 @@ VALUE bamfcsv_finalize_cell(char *cell_start, char *cell_end, int quote_count) {
cell_end--;
}
- VALUE cell_str = rb_str_new(cell_start, cell_end-cell_start+1);
+ VALUE cell_str = rb_enc_str_new(cell_start, cell_end-cell_start+1, enc);
return cell_str;
}
-VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
+VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
+ char *buf = RSTRING_PTR(string);
+ long bufsize = RSTRING_LEN(string);
+ rb_encoding *enc = rb_enc_from_index(ENCODING_GET(string));
+
unsigned long num_rows = 1, cell_count = 1;
int quote_count = 0, quotes_matched = 1;
@@ -57,7 +62,7 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
if (quote_count && *(cur-1) != '"')
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu.", num_rows, cell_count);
- VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
+ VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
if (quote_count)
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
@@ -72,7 +77,7 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
if (quote_count && !(*(cur-1) == '"' || *(cur-1) == '\r' && *(cur-2) == '"'))
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOL", num_rows, cell_count);
- VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
+ VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
if (quote_count)
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
@@ -98,7 +103,7 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
else if (quote_count && *(cur-1) != '"') /* Quotes closed before end of final cell */
rb_raise(BAMFCSV_MalformedCSVError_class, "Unclosed quoted field on line %lu, cell %lu: EOF", num_rows, cell_count);
- VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count);
+ VALUE cell_str = bamfcsv_finalize_cell(cell_start, cur-1, quote_count, enc);
if (quote_count)
rb_funcall(cell_str, gsub_bang, 2, dbl_dquote, dquote);
/* Completely blank lines don't even get a nil. This matches CSV's behavior. */
@@ -110,12 +115,6 @@ VALUE bamfcsv_build_matrix(char *buf, unsigned long bufsize) {
}
-VALUE bamfcsv_parse_string(VALUE self, VALUE string) {
-
- return bamfcsv_build_matrix(RSTRING_PTR(string), NUM2ULONG(rb_str_length(string)));
-
-}
-
void Init_bamfcsv() {
BAMFCSV_module = rb_define_module("BAMFCSV");
View
8 spec/lib/bamfcsv_spec.rb
@@ -56,7 +56,7 @@
BAMFCSV.parse("1,2").should == [["1","2"]]
end
- it 'correctly escaptes ""' do
+ it 'correctly escapes ""' do
BAMFCSV.parse(%Q|1,"""2"""\n|).should == [["1", '"2"']]
end
@@ -74,6 +74,10 @@
BAMFCSV.parse("1\r\n2").should == [["1"],["2"]]
end
+ it "parses data outside the 7-bit range" do
+ BAMFCSV.parse("age \u226540 years").should == [["age \u226540 years"]]
+ end
+
describe "default CSV module compatibility" do
it "adds a nil cell after a trailing comma with no newline" do
BAMFCSV.parse("1,2,").should == [["1","2",nil]]
@@ -144,7 +148,7 @@
table.kind_of?(Array).should be_true
table.is_a?(Array).should be_true
end
-
+
it "Array === table" do
pending { (Array === table).should be_true }
end

0 comments on commit bfbfedc

Please sign in to comment.
Something went wrong with that request. Please try again.