Skip to content
Browse files

fix title matching bug, support https

  • Loading branch information...
1 parent c1d1492 commit 1de23f355895e6af971aa3ac929c529138352d31 @paulalesius paulalesius committed Dec 6, 2011
View
2 misc/example-configuration.properties
@@ -14,6 +14,6 @@ forskbot.channels=#testchannel63546364,#testchannel12312
# Optional properties
#---
# bot nick
-forskbot.nick=g11k
+forskbot.nick=g13k
# bot name
forskbot.name=gordon
View
4 pom.xml
@@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>forskbot</groupId>
<artifactId>forskbot</artifactId>
- <version>0.0.3-SNAPSHOT</version>
+ <version>0.0.4-SNAPSHOT</version>
<name>Gordon9k</name>
<contributors>
@@ -65,4 +65,4 @@
</dependency>
</dependencies>
-</project>
+</project>
View
97 src/main/java/forskbot/irc/IrcBot.java
@@ -10,11 +10,18 @@
import java.net.URI;
import java.net.URLConnection;
import java.net.UnknownHostException;
+import java.security.SecureRandom;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.net.SocketFactory;
+import javax.net.ssl.HttpsURLConnection;
+import javax.net.ssl.SSLContext;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509TrustManager;
import org.apache.log4j.Logger;
@@ -27,7 +34,7 @@
*/
public class IrcBot {
- public static final Pattern URL_PATTERN = Pattern.compile("^((?i:http://.*)|(?i:www\\..*)|([a-zA-Z0-9\\-]+?(\\.[a-zA-Z0-9\\-]+?)+?/.*))$");
+ public static final Pattern URL_PATTERN = Pattern.compile("^((?i:https?://.*)|(?i:www\\..*)|([a-zA-Z0-9\\-]+?(\\.[a-zA-Z0-9\\-]+?)+?/.*))$");
public static final Pattern TITLE_PATTERN = Pattern.compile("^.*(<[\\s+]?(?i:title)[\\s+]?>(.*?)<[\\s+]?/[\\s+]?(?i:title)[\\s+]?>).*$");
public static final int CONNECT_TIMEOUT_MS = 2000;
public static final int PING_TIMEOUT_MS = 350000;
@@ -110,12 +117,11 @@ public void rwLoop() throws IOException {
synchronized (this) {
String line = reader.readLine();
- if (line == null) {
+ if (line == null || line.isEmpty()) {
continue;
}
- if (line != null && line.length() > MAX_SERVER_LINE_LENGTH && !line.isEmpty()) {
- // Maybe warn?
+ if (line != null && line.length() > MAX_SERVER_LINE_LENGTH) {
continue;
}
@@ -148,7 +154,26 @@ public void rwLoop() throws IOException {
break;
}
} else {
- detectParseUrls(chanOrNick, messageParts);
+ log.debug("Detecing url from: " + Arrays.toString(messageParts));
+ int currMatches = 0;
+ for (int i = 0; i < messageParts.length; i++) {
+ String part = messageParts[i];
+ if (i == 0) {
+ part = part.substring(1);
+ }
+
+ if (URL_PATTERN.matcher(part).matches()) {
+ if (currMatches++ >= MAX_URLMATCHES_PERLINE) {
+ break;
+ }
+
+ try {
+ new Thread(new TitleHandler(chanOrNick, part)).start();
+ } catch (Throwable t) {
+ log.error(t);
+ }
+ }
+ }
}
}
@@ -164,7 +189,7 @@ public void rwLoop() throws IOException {
}
}
- log.warn("Unhandled: " + Arrays.toString(parts));
+ log.info("Unhandled: " + Arrays.toString(parts));
}
}
} finally {
@@ -174,42 +199,17 @@ public void rwLoop() throws IOException {
}
}
- /**
- * May be flooded if in multiple channels
- */
- private void detectParseUrls(String chanOrNick, String[] messageParts) {
-
- int currMatches = 0;
- for (int i = 0; i < messageParts.length; i++) {
- String part = messageParts[i];
- if (i == 0) {
- part = part.substring(1);
- }
-
- if (URL_PATTERN.matcher(part).matches()) {
- if (currMatches++ >= MAX_URLMATCHES_PERLINE) {
- break;
- }
-
- try {
- new Thread(new TitleHandler(chanOrNick, part)).start();
- } catch (Throwable t) {
- log.error(t);
- }
- }
- }
- }
-
private class TitleHandler implements Runnable {
private String chanOrNick;
private URI uri;
public TitleHandler(String chanOrNick, String uriStr) throws IllegalArgumentException {
- if (!uriStr.toLowerCase().startsWith("http://")) {
+ if (!uriStr.toLowerCase().startsWith("http://") && !uriStr.toLowerCase().startsWith("https://")) {
uriStr = "http://" + uriStr;
}
+
uri = URI.create(uriStr);
this.chanOrNick = chanOrNick;
@@ -226,6 +226,31 @@ public void run() {
try {
URLConnection conn = uri.toURL().openConnection();
+ if (uri.getScheme().equals("https")) {
+ HttpsURLConnection https = (HttpsURLConnection) conn;
+ SSLContext sc = SSLContext.getInstance("TLS");
+ TrustManager[] tm = new TrustManager[] { new X509TrustManager() {
+
+ @Override
+ public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
+
+ }
+
+ @Override
+ public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
+
+ }
+
+ @Override
+ public X509Certificate[] getAcceptedIssuers() {
+
+ return null;
+ }
+
+ } };
+ sc.init(null, tm, new SecureRandom());
+ https.setSSLSocketFactory(sc.getSocketFactory());
+ }
conn.setConnectTimeout(CONNECT_TIMEOUT_MS);
conn.setReadTimeout(CONNECT_TIMEOUT_MS);
conn.setUseCaches(false);
@@ -240,10 +265,14 @@ public void run() {
urlReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
String line = null;
+ StringBuffer buffer = new StringBuffer();
// Could use a max_nr_lines_read limit
while ((line = urlReader.readLine()) != null) {
- Matcher matcher = TITLE_PATTERN.matcher(line);
+ buffer.append(line);
+ line = null;
+
+ Matcher matcher = TITLE_PATTERN.matcher(buffer);
if (matcher.find()) {
String pageTitle = matcher.group(2).replaceAll("\\s+", " ").trim();
TitleSimilarity ts = new TitleSimilarity(uri.toASCIIString(), pageTitle);
View
1 src/main/java/forskbot/irc/TitleSimilarity.java
@@ -23,7 +23,6 @@ public TitleSimilarity(String uri, String title) {
private void matchFilteredContained(String uri, String title) {
for (String word : title.split("\\s+")) {
- System.err.println(word);
if (word.length() >= WORDLEN_THRESH && word.matches("\\w+")) {
keywordsTotal++;
if (uri.contains(word)) {
View
5 src/test/java/forskbot/TestBot.java
@@ -20,7 +20,8 @@
@Test
public void testUrlMatch() {
- String[] toMatch = { "www.google.coM", "www.google.com/one?two=three", "http://www.google.com", "http://www.google.com/hellothere", "google.com/hello" };
+ String[] toMatch = { "www.google.coM", "www.google.com/one?two=three", "http://www.google.com", "http://www.google.com/hellothere", "google.com/hello",
+ "http://www.youtube.com/watch?v=aAAAAaAAaAA" };
for (String match : toMatch) {
Assert.assertTrue(IrcBot.URL_PATTERN.matcher(match).matches());
@@ -41,7 +42,7 @@ public void testConnectAndDoStuff() throws Exception {
props.setProperty(Configuration.PROP_CHANNELS, "#nine13132"); // nine1238
props.setProperty(Configuration.PROP_HOST, "irc.freenode.org");
props.setProperty(Configuration.PROP_PORT, "6667");
- props.setProperty(Configuration.PROP_NICK, "Gordon16k");
+ props.setProperty(Configuration.PROP_NICK, "g99k");
Configuration config = Configuration.getSelf();
config.parseRawConfig(props);

0 comments on commit 1de23f3

Please sign in to comment.
Something went wrong with that request. Please try again.