Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

fix title matching bug, support https

  • Loading branch information...
commit 1de23f355895e6af971aa3ac929c529138352d31 1 parent c1d1492
paulalesius paulalesius authored
2  misc/example-configuration.properties
@@ -14,6 +14,6 @@ forskbot.channels=#testchannel63546364,#testchannel12312
14 14 # Optional properties
15 15 #---
16 16 # bot nick
17   -forskbot.nick=g11k
  17 +forskbot.nick=g13k
18 18 # bot name
19 19 forskbot.name=gordon
4 pom.xml
@@ -5,7 +5,7 @@
5 5 <modelVersion>4.0.0</modelVersion>
6 6 <groupId>forskbot</groupId>
7 7 <artifactId>forskbot</artifactId>
8   - <version>0.0.3-SNAPSHOT</version>
  8 + <version>0.0.4-SNAPSHOT</version>
9 9 <name>Gordon9k</name>
10 10
11 11 <contributors>
@@ -65,4 +65,4 @@
65 65 </dependency>
66 66 </dependencies>
67 67
68   -</project>
  68 +</project>
97 src/main/java/forskbot/irc/IrcBot.java
@@ -10,11 +10,18 @@
10 10 import java.net.URI;
11 11 import java.net.URLConnection;
12 12 import java.net.UnknownHostException;
  13 +import java.security.SecureRandom;
  14 +import java.security.cert.CertificateException;
  15 +import java.security.cert.X509Certificate;
13 16 import java.util.Arrays;
14 17 import java.util.regex.Matcher;
15 18 import java.util.regex.Pattern;
16 19
17 20 import javax.net.SocketFactory;
  21 +import javax.net.ssl.HttpsURLConnection;
  22 +import javax.net.ssl.SSLContext;
  23 +import javax.net.ssl.TrustManager;
  24 +import javax.net.ssl.X509TrustManager;
18 25
19 26 import org.apache.log4j.Logger;
20 27
@@ -27,7 +34,7 @@
27 34 */
28 35 public class IrcBot {
29 36
30   - public static final Pattern URL_PATTERN = Pattern.compile("^((?i:http://.*)|(?i:www\\..*)|([a-zA-Z0-9\\-]+?(\\.[a-zA-Z0-9\\-]+?)+?/.*))$");
  37 + public static final Pattern URL_PATTERN = Pattern.compile("^((?i:https?://.*)|(?i:www\\..*)|([a-zA-Z0-9\\-]+?(\\.[a-zA-Z0-9\\-]+?)+?/.*))$");
31 38 public static final Pattern TITLE_PATTERN = Pattern.compile("^.*(<[\\s+]?(?i:title)[\\s+]?>(.*?)<[\\s+]?/[\\s+]?(?i:title)[\\s+]?>).*$");
32 39 public static final int CONNECT_TIMEOUT_MS = 2000;
33 40 public static final int PING_TIMEOUT_MS = 350000;
@@ -110,12 +117,11 @@ public void rwLoop() throws IOException {
110 117 synchronized (this) {
111 118 String line = reader.readLine();
112 119
113   - if (line == null) {
  120 + if (line == null || line.isEmpty()) {
114 121 continue;
115 122 }
116 123
117   - if (line != null && line.length() > MAX_SERVER_LINE_LENGTH && !line.isEmpty()) {
118   - // Maybe warn?
  124 + if (line != null && line.length() > MAX_SERVER_LINE_LENGTH) {
119 125 continue;
120 126 }
121 127
@@ -148,7 +154,26 @@ public void rwLoop() throws IOException {
148 154 break;
149 155 }
150 156 } else {
151   - detectParseUrls(chanOrNick, messageParts);
  157 + log.debug("Detecing url from: " + Arrays.toString(messageParts));
  158 + int currMatches = 0;
  159 + for (int i = 0; i < messageParts.length; i++) {
  160 + String part = messageParts[i];
  161 + if (i == 0) {
  162 + part = part.substring(1);
  163 + }
  164 +
  165 + if (URL_PATTERN.matcher(part).matches()) {
  166 + if (currMatches++ >= MAX_URLMATCHES_PERLINE) {
  167 + break;
  168 + }
  169 +
  170 + try {
  171 + new Thread(new TitleHandler(chanOrNick, part)).start();
  172 + } catch (Throwable t) {
  173 + log.error(t);
  174 + }
  175 + }
  176 + }
152 177 }
153 178 }
154 179
@@ -164,7 +189,7 @@ public void rwLoop() throws IOException {
164 189 }
165 190 }
166 191
167   - log.warn("Unhandled: " + Arrays.toString(parts));
  192 + log.info("Unhandled: " + Arrays.toString(parts));
168 193 }
169 194 }
170 195 } finally {
@@ -174,32 +199,6 @@ public void rwLoop() throws IOException {
174 199 }
175 200 }
176 201
177   - /**
178   - * May be flooded if in multiple channels
179   - */
180   - private void detectParseUrls(String chanOrNick, String[] messageParts) {
181   -
182   - int currMatches = 0;
183   - for (int i = 0; i < messageParts.length; i++) {
184   - String part = messageParts[i];
185   - if (i == 0) {
186   - part = part.substring(1);
187   - }
188   -
189   - if (URL_PATTERN.matcher(part).matches()) {
190   - if (currMatches++ >= MAX_URLMATCHES_PERLINE) {
191   - break;
192   - }
193   -
194   - try {
195   - new Thread(new TitleHandler(chanOrNick, part)).start();
196   - } catch (Throwable t) {
197   - log.error(t);
198   - }
199   - }
200   - }
201   - }
202   -
203 202 private class TitleHandler implements Runnable {
204 203
205 204 private String chanOrNick;
@@ -207,9 +206,10 @@ private void detectParseUrls(String chanOrNick, String[] messageParts) {
207 206
208 207 public TitleHandler(String chanOrNick, String uriStr) throws IllegalArgumentException {
209 208
210   - if (!uriStr.toLowerCase().startsWith("http://")) {
  209 + if (!uriStr.toLowerCase().startsWith("http://") && !uriStr.toLowerCase().startsWith("https://")) {
211 210 uriStr = "http://" + uriStr;
212 211 }
  212 +
213 213 uri = URI.create(uriStr);
214 214
215 215 this.chanOrNick = chanOrNick;
@@ -226,6 +226,31 @@ public void run() {
226 226
227 227 try {
228 228 URLConnection conn = uri.toURL().openConnection();
  229 + if (uri.getScheme().equals("https")) {
  230 + HttpsURLConnection https = (HttpsURLConnection) conn;
  231 + SSLContext sc = SSLContext.getInstance("TLS");
  232 + TrustManager[] tm = new TrustManager[] { new X509TrustManager() {
  233 +
  234 + @Override
  235 + public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
  236 +
  237 + }
  238 +
  239 + @Override
  240 + public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
  241 +
  242 + }
  243 +
  244 + @Override
  245 + public X509Certificate[] getAcceptedIssuers() {
  246 +
  247 + return null;
  248 + }
  249 +
  250 + } };
  251 + sc.init(null, tm, new SecureRandom());
  252 + https.setSSLSocketFactory(sc.getSocketFactory());
  253 + }
229 254 conn.setConnectTimeout(CONNECT_TIMEOUT_MS);
230 255 conn.setReadTimeout(CONNECT_TIMEOUT_MS);
231 256 conn.setUseCaches(false);
@@ -240,10 +265,14 @@ public void run() {
240 265
241 266 urlReader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
242 267 String line = null;
  268 + StringBuffer buffer = new StringBuffer();
243 269 // Could use a max_nr_lines_read limit
244 270 while ((line = urlReader.readLine()) != null) {
245 271
246   - Matcher matcher = TITLE_PATTERN.matcher(line);
  272 + buffer.append(line);
  273 + line = null;
  274 +
  275 + Matcher matcher = TITLE_PATTERN.matcher(buffer);
247 276 if (matcher.find()) {
248 277 String pageTitle = matcher.group(2).replaceAll("\\s+", " ").trim();
249 278 TitleSimilarity ts = new TitleSimilarity(uri.toASCIIString(), pageTitle);
1  src/main/java/forskbot/irc/TitleSimilarity.java
@@ -23,7 +23,6 @@ public TitleSimilarity(String uri, String title) {
23 23 private void matchFilteredContained(String uri, String title) {
24 24
25 25 for (String word : title.split("\\s+")) {
26   - System.err.println(word);
27 26 if (word.length() >= WORDLEN_THRESH && word.matches("\\w+")) {
28 27 keywordsTotal++;
29 28 if (uri.contains(word)) {
5 src/test/java/forskbot/TestBot.java
@@ -20,7 +20,8 @@
20 20 @Test
21 21 public void testUrlMatch() {
22 22
23   - String[] toMatch = { "www.google.coM", "www.google.com/one?two=three", "http://www.google.com", "http://www.google.com/hellothere", "google.com/hello" };
  23 + String[] toMatch = { "www.google.coM", "www.google.com/one?two=three", "http://www.google.com", "http://www.google.com/hellothere", "google.com/hello",
  24 + "http://www.youtube.com/watch?v=aAAAAaAAaAA" };
24 25
25 26 for (String match : toMatch) {
26 27 Assert.assertTrue(IrcBot.URL_PATTERN.matcher(match).matches());
@@ -41,7 +42,7 @@ public void testConnectAndDoStuff() throws Exception {
41 42 props.setProperty(Configuration.PROP_CHANNELS, "#nine13132"); // nine1238
42 43 props.setProperty(Configuration.PROP_HOST, "irc.freenode.org");
43 44 props.setProperty(Configuration.PROP_PORT, "6667");
44   - props.setProperty(Configuration.PROP_NICK, "Gordon16k");
  45 + props.setProperty(Configuration.PROP_NICK, "g99k");
45 46
46 47 Configuration config = Configuration.getSelf();
47 48 config.parseRawConfig(props);

0 comments on commit 1de23f3

Please sign in to comment.
Something went wrong with that request. Please try again.