From 0f3e97e48cc0a4b42b60c2160b9a7049440fc9fa Mon Sep 17 00:00:00 2001 From: aboutaaron Date: Sun, 19 Aug 2012 15:15:39 -0700 Subject: [PATCH] combined old code to one file --- keywords.rb | 17 ++++++++++++--- refactor.rb | 63 ----------------------------------------------------- 2 files changed, 14 insertions(+), 66 deletions(-) delete mode 100644 refactor.rb diff --git a/keywords.rb b/keywords.rb index 0272bb5..514d20e 100755 --- a/keywords.rb +++ b/keywords.rb @@ -15,10 +15,12 @@ a = Mechanize.new # Login +username = "" +password = "" x = a.get(base_url).forms.first p "Submitting login info..." -x["name"] = "jevon.phillips" -x["password"] = "marvel" +x["name"] = username +x["password"] = password x.submit # open keyword section w/ appropriate parameters @@ -34,7 +36,11 @@ image_page = base_url + link['href'] a.get(image_page) # Grab Descriptions, remove whitespace, store in variable photo_info and write to photo.txt - file_name = a.page.link_with(:href => /\-sxga.jpg/).text + begin + file_name = a.page.link_with(:href => /\-sxga.jpg/).text + rescue StandardError => e + puts "Error: #{e}" + end photo_info = a.page.search("#image_description, #image_caption, #image_title").map(&:text).map(&:strip) open("captions_keywords.txt", "a") do |f| @@ -82,8 +88,13 @@ end end end + exec "figlet 'Finished keywords'" + puts + +a.get("http://laiac1b5z1-int.latimes.com/images?content_partial=global%2Fcontent_list&filter_by=title&filter_status=all&filter_val=&limit=1000&offset=20&search_published=false&sort_asc=desc&sort_by=modified") + p "Opening up the 'Images' section with all images. This will take a while..." # URL with maximum photo size a.get("http://laiac1b5z1-int.latimes.com/images?content_partial=global%2Fcontent_list&filter_by=title&filter_status=all&filter_val=&limit=1000&offset=20&search_published=false&sort_asc=desc&sort_by=modified") diff --git a/refactor.rb b/refactor.rb deleted file mode 100644 index f9c6380..0000000 --- a/refactor.rb +++ /dev/null @@ -1,63 +0,0 @@ -require "rubygems" -require "mechanize" -require "open-uri" -require "fileutils" - -# Files -IMAGES = "images" -Dir.mkdir(IMAGES) unless File.exists?(IMAGES) -File.open("captions.txt", "w"){ |somefile| somefile.puts "TOP"} - - - -# Structure -base_url = "http://laiac1b5z1-int.latimes.com" - -# Initialize Mechanize -a = Mechanize.new - -# Login -x = a.get("http://laiac1b5z1-int.latimes.com/").forms.first -p "Submitting login info..." -x["name"] = "jevon.phillips" -x["password"] = "marvel" -x.submit - -# Click on Image page -p "Opening up the 'Images' page with all images. This will take a while..." -puts -puts -# URL with maximum photo size -a.get("http://laiac1b5z1-int.latimes.com/images?content_partial=global%2Fcontent_list&filter_by=title&filter_status=all&filter_val=&limit=1000&offset=20&search_published=false&sort_asc=desc&sort_by=modified") - -# Boom -p "Iterating through links..." -a.page.search("td:nth-child(3) a").each do |link| - image_page = base_url + link['href'] - p "Here's your full url..." - p image_page - puts - a.get(image_page) - # Grab Descriptions, remove whitespace, store in variable photo_info and write to photo.txt - photo_info = a.page.search("#image_description, #image_caption, #image_title").map(&:text).map(&:strip) - - open("captions.txt", "a") do |f| - f << photo_info - f.puts "\n" - f.puts "The file name is " + a.page.link_with(:href => /\-sxga.jpg/).text - f.puts "\n"*2 - end - - # Click largest image - a.page.link_with(:href => /\-sxga.jpg/).click - - # Setting variables for download - img_url = a.page.search("img")[0].attributes['src'].text - - # Change to Images Directory and download file - Dir.chdir("images") do - a.get(base_url+img_url).save - # Sleepy time for the request - - end -end \ No newline at end of file