Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

case: when only get content

  • Loading branch information...
commit 166699356cf6a830b25b93638f21fb9b8e82eab5 1 parent 1765807
@j3nnn1 authored
Showing with 4 additions and 3 deletions.
  1. +4 −3 textmining/remove_html_tags/remove_html.rb
View
7 textmining/remove_html_tags/remove_html.rb
@@ -2,7 +2,7 @@
require 'rubygems'
require 'sanitize'
-require 'CSV'
+require 'csv'
terms = Hash.new
terms = {'\xe1' => 'á', '\xe9' => 'é', '\xed' => 'í', '\xfa' => 'ú', '\xf3' => 'ó'}
@@ -14,15 +14,16 @@ def removeaccent(word, terms)
return word
end
-file_clean = File.open("lanacion.com.ar.csv.data.clean", "w")
+file_clean = File.open("infobae_finanza.csv.data.clean", "w")
-CSV.foreach("lanacion.com.ar.csv.data", encoding: 'UTF-8' ) do |row|
+CSV.foreach("infobae_finanza.csv", encoding: 'UTF-8' ) do |row|
#title
title = removeaccent(Sanitize.clean(row[1]).force_encoding('UTF-8'), terms)
#content
content = removeaccent(Sanitize.clean(row[0]).force_encoding('UTF-8'), terms)
#csv
csv_string = [title, content].to_csv
+ #csv_string = [content].to_csv
file_clean.write(csv_string)
end
Please sign in to comment.
Something went wrong with that request. Please try again.