Skip to content

Commit

Permalink
changed to fasterCSV for ruby < 1.9
Browse files Browse the repository at this point in the history
added simple check for uri and integer
  • Loading branch information
benjab committed Sep 19, 2011
1 parent 0baa607 commit 7539585
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 14 deletions.
8 changes: 5 additions & 3 deletions README
Expand Up @@ -6,7 +6,9 @@
# Date: 15.09.2011

## FILES ##
csv2rdf.rb -- main ruby script to convert NORMARC file to RDF
csv2rdf.rb -- main ruby script to convert NORMARC file to RDF
example.csv -- example table
example.rdf -- example output

## FEATURES ##
takes csv and uses column headers as properties
Expand All @@ -23,7 +25,7 @@ rdf-n3.rb (for n3 output)
## UBUNTU INSTALL ##
(for rdf-xml support)
sudo apt-get install libxml2-dev libxslt1-dev
gem install rdf rdf-rdfxml rdf-n3
gem install rdf rdf-rdfxml rdf-n3 (csv if ruby < 1.9)

## USAGE ##
csv2rdf.rb -i input_file.csv -o output_file -b base_uri -t rdf_type [-r recordlimit]
Expand All @@ -35,4 +37,4 @@ csv2rdf.rb -i input_file.csv -o output_file -b base_uri -t rdf_type [-r recordl

## EXAMPLE ##

ruby csv2rdf.rb -i input.csv -b http://example.com/bookreviews/ -t http://purl.org/stuff/rev#Review -o output.rdf
ruby csv2rdf.rb -i example.csv -b http://example.com/bookreviews/ -t http://purl.org/stuff/rev#Review -o example.rdf
29 changes: 18 additions & 11 deletions csv2rdf.rb
@@ -1,7 +1,13 @@
#!/usr/bin/env ruby

require 'rubygems'
require 'csv'
if RUBY_VERSION < "1.9"
require "rubygems"
require "faster_csv"
CSV = FCSV
else
require "csv"
end
require 'rdf'
require 'rdf/rdfxml'
require 'rdf/n3'
Expand Down Expand Up @@ -49,8 +55,6 @@ def initialize(record)
def construct_uri
@uri = RDF::URI.intern($base_uri)
id = "#{@record}"

#id = "#{@record[headers[0]]}"
id.gsub!(/[^\w\s\-ÆØÅæøå]/,"")
id.gsub!(/\s/,"_")
@uri += id
Expand All @@ -74,24 +78,27 @@ def write_record

count = 0

csv = CSV.read($input_file)
headers = csv.shift.map {|i| i.to_s }
string_data = csv.map {|row| row.map {|cell| cell.to_s } }
array_of_hashes = string_data.map {|row| Hash[*headers.zip(row).flatten] }
csv = CSV.read($input_file, {:headers => true, :encoding => 'UTF-8'})
# start writer handle
RDF::Writer.open($output_file) do | writer |
@@writer = writer

array_of_hashes.each do | record |
csv.each do | record |
count += 1
if $recordlimit then break if count > $recordlimit end
# take the content of the first column to make record id
rdfrecord = RDFModeler.new(record["#{headers[0]}"])
rdfrecord = RDFModeler.new(record[0])
rdfrecord.set_type(RDF::URI($rdf_type))
record.each do |k,v|
unless v.empty?
unless v.nil?
v.strip_leading_and_trailing_punct
rdfrecord.assert(RDF::URI(k), RDF::Literal("#{v}"))
if v =~ /^http/
rdfrecord.assert(RDF::URI(k), RDF::URI("#{v}"))
elsif v =~ /^[\d]+$/
rdfrecord.assert(RDF::URI(k), RDF::Literal("#{v}", :datatype => RDF::XSD.integer))
else
rdfrecord.assert(RDF::URI(k), v)
end
end
end

Expand Down

0 comments on commit 7539585

Please sign in to comment.