Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Initial commit

  • Loading branch information...
commit 4a9e955189efba430a30441272404289ceb1f0a9 0 parents
@hasanein authored
1  README
@@ -0,0 +1 @@
+Scraping the exchange rate information from Yahoo finance web site
67 src/com/ehilla/webscrapping/HttpScrapper.java
@@ -0,0 +1,67 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package com.ehilla.webscrapping;
+
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ *
+ * @author Hasanein Khafaji
+ */
+public class HttpScrapper implements HttpScrapperInterface
+{
+ private URL urlAddress;
+ private Pattern firstPattern, secondPattern;
+
+ public HttpScrapper(URL urlAddress, Pattern firstPattern, Pattern secondPattern)
+ {
+ this.urlAddress = urlAddress;
+ this.firstPattern = firstPattern;
+ this.secondPattern = secondPattern;
+ }
+
+ public String scrapIt()
+ {
+ try
+ {
+ String line = "";
+ String htmlPage = "";
+ BufferedReader urlReader = new BufferedReader(new InputStreamReader(urlAddress.openStream()));
+ BufferedReader br = new BufferedReader(new InputStreamReader(urlAddress.openStream()));
+ while ((line = urlReader.readLine()) != null)
+ {
+ htmlPage = htmlPage + line + "\n";
+ }
+ Matcher m1 = firstPattern.matcher(htmlPage);
+ int numMatches = 0;
+ String lastMatch = null;
+ while (m1.find())
+ {
+ numMatches++;
+ Matcher m2 = secondPattern.matcher(m1.group());
+ while (m2.find())
+ {
+ lastMatch = m2.group().trim();
+ }
+ }
+ if (numMatches == 1)
+ {
+ return lastMatch;
+ } else
+ {
+ return null;
+ }
+ } catch (Exception e)
+ {
+ System.out.println("Error Detected...Printing the stack trace");
+ e.printStackTrace();
+ return null;
+ }
+ }
+}
82 src/com/ehilla/webscrapping/HttpScrapperDemo.java
@@ -0,0 +1,82 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package com.ehilla.webscrapping;
+
+import java.net.URL;
+import java.util.regex.Pattern;
+
+/**
+ *
+ * @author hasanein
+ */
+public class HttpScrapperDemo implements HttpScrapperInterface
+{
+ private URL theURL;
+ private Pattern pattern_01;
+ private Pattern pattern_02;
+
+ public HttpScrapperDemo()
+ {
+ theURL = null;
+ pattern_01 = null;
+ pattern_02 = null;
+ }
+
+ public HttpScrapperDemo(URL theURL, Pattern pattern_01, Pattern pattern_02)
+ {
+ this.theURL = theURL;
+ this.pattern_01 = pattern_01;
+ this.pattern_02 = pattern_02;
+ }
+
+ public void setSiteUrl(URL siteURL)
+ {
+ theURL = siteURL;
+ }
+
+ public String getURL()
+ {
+ return theURL.toExternalForm();
+ }
+
+ public void setFirstPattern(Pattern firstPattern)
+ {
+ this.pattern_01 = firstPattern;
+ }
+
+ public String getFirstPattern()
+ {
+ return this.pattern_01.toString();
+ }
+
+ public void setSecondPattern(Pattern secondPattern)
+ {
+ this.pattern_02 = secondPattern;
+ }
+
+ public String getSecondPattern()
+ {
+ return this.pattern_02.toString();
+ }
+
+ public static void main(String args[])
+ throws Exception
+ {
+ HttpScrapperDemo httpDemo = new HttpScrapperDemo();
+ URL siteURL = new URL("http://finance.yahoo.com");
+ Pattern pattern_01 = Pattern.compile("GBP/USD.*\n<td>\\d+.\\d+");
+ Pattern pattern_02 = Pattern.compile("\\d+.\\d+$");
+ httpDemo.setFirstPattern(pattern_01);
+ httpDemo.setSecondPattern(pattern_02);
+ httpDemo.setSiteUrl(siteURL);
+ System.out.println(httpDemo.getURL() + ":" + httpDemo.scrapIt());
+ }
+
+ public String scrapIt()
+ {
+ HttpScrapper httpScrapper = new HttpScrapper(theURL, pattern_01, pattern_02);
+ return httpScrapper.scrapIt();
+ }
+}
14 src/com/ehilla/webscrapping/HttpScrapperInterface.java
@@ -0,0 +1,14 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package com.ehilla.webscrapping;
+
+/**
+ *
+ * @author hasanein
+ */
+public interface HttpScrapperInterface
+{
+ public String scrapIt();
+}
41 src/com/ehilla/webscrapping/ScrapeYahooFinance.java
@@ -0,0 +1,41 @@
+package com.ehilla.webscrapping;
+import java.net.*;
+import java.io.*;
+import java.util.regex.*;
+public class ScrapeYahooFinance
+{
+ public static void main(String args[])
+ throws Exception
+ {
+ String urlAddress = "http://finance.yahoo.com";
+ URL url = new URL(urlAddress);
+ BufferedReader urlReader = new BufferedReader(new InputStreamReader(url.openStream()));
+ BufferedReader br = new BufferedReader(new InputStreamReader(url.openStream()));
+ String line = "";
+ Pattern pattern_01 = Pattern.compile("GBP/USD.*\n<td>\\d+.\\d+");
+ Pattern pattern_02 = Pattern.compile("\\d+.\\d+$");
+ String htmlPage = "";
+ while ((line = urlReader.readLine()) != null)
+ {
+ htmlPage = htmlPage + line + "\n";
+ }
+ Matcher m1 = pattern_01.matcher(htmlPage);
+ int numMatches = 0;
+ String lastMatch = null;
+ while(m1.find())
+ {
+ numMatches ++;
+ Matcher m2 = pattern_02.matcher(m1.group());
+ while(m2.find())
+ {
+ lastMatch = m2.group().trim();
+ }
+ }
+ if (numMatches == 1)
+ {
+ System.out.println("The matched result is: " + lastMatch);
+ }
+ else
+ System.out.println("There are more than one match, please review your regex...");
+ }
+}
Please sign in to comment.
Something went wrong with that request. Please try again.