Permalink
Browse files

Added !ignore <regex> command for urls

  • Loading branch information...
1 parent 1de23f3 commit 71ecd3d6e79f2a6ed50bb9ccdb99a5c2f58f5fab @paulalesius paulalesius committed Dec 8, 2011
@@ -14,6 +14,6 @@ forskbot.channels=#testchannel63546364,#testchannel12312
# Optional properties
#---
# bot nick
-forskbot.nick=g13k
+forskbot.nick=g14k
# bot name
forskbot.name=gordon
View
@@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>forskbot</groupId>
<artifactId>forskbot</artifactId>
- <version>0.0.4-SNAPSHOT</version>
+ <version>0.0.5-SNAPSHOT</version>
<name>Gordon9k</name>
<contributors>
@@ -14,12 +14,18 @@
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
import javax.net.SocketFactory;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
+import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
@@ -38,6 +44,8 @@
public static final Pattern TITLE_PATTERN = Pattern.compile("^.*(<[\\s+]?(?i:title)[\\s+]?>(.*?)<[\\s+]?/[\\s+]?(?i:title)[\\s+]?>).*$");
public static final int CONNECT_TIMEOUT_MS = 2000;
public static final int PING_TIMEOUT_MS = 350000;
+ public static final int MAX_SERVER_LINE_LENGTH = 5000;
+ public static final int MAX_URLMATCHES_PERLINE = 3;
private Logger log = Logger.getLogger(IrcBot.class);
private String host;
@@ -47,21 +55,46 @@
private Socket socket;
private BufferedReader reader;
private BufferedWriter writer;
- private static final int MAX_SERVER_LINE_LENGTH = 5000;
- private static final int MAX_URLMATCHES_PERLINE = 3;
+ private ExecutorService es;
private boolean reconnect = true;
//
private String nl = "\r\n";
+ private SSLSocketFactory sf;
+ private Set<String> ignores;
+ private Set<String> quitVoters;
- public IrcBot() {
+ public IrcBot() throws Exception {
Configuration config = Configuration.getSelf();
this.host = config.getHost();
this.port = config.getPort();
this.nick = config.getNick();
this.name = config.getName();
+ this.es = Executors.newSingleThreadExecutor();
+ SSLContext sc = SSLContext.getInstance("TLS");
+ TrustManager[] tm = new TrustManager[] { new X509TrustManager() {
+ @Override
+ public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
+
+ }
+
+ @Override
+ public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
+
+ }
+
+ @Override
+ public X509Certificate[] getAcceptedIssuers() {
+
+ return null;
+ }
+ } };
+ sc.init(null, tm, new SecureRandom());
+ this.sf = sc.getSocketFactory();
+ this.ignores = new HashSet<String>();
+ this.quitVoters = new HashSet<String>();
}
private void setRequestProperties(URLConnection conn) {
@@ -102,6 +135,14 @@ private void write(String line) throws IOException {
}
}
+ private void privmsg(String msg, String target) throws IOException {
+
+ synchronized (writer) {
+ writer.write("PRIVMSG " + target + " :" + msg + nl);
+ writer.flush();
+ }
+ }
+
/**
* Loop over reads from server. The lines received will be in the form of
* "<server id> <irc protocol command> <the rest ...>"
@@ -146,15 +187,42 @@ public void rwLoop() throws IOException {
// Handle only public
if (chanOrNick.startsWith("#")) {
- // Bot command
if (messageParts.length >= 2 && messageParts[0].matches(":" + Configuration.getSelf().getNick() + ".?")) {
+ // Bot commands
if (messageParts[1].equals("!quit")) {
- write("QUIT :Byte");
- reconnect = false;
- break;
+ log.info("Quit vote from: " + parts[0]);
+ quitVoters.add(parts[0]);
+ int left = 3 - quitVoters.size();
+ if (left == 0) {
+ write("QUIT :Byte");
+ reconnect = false;
+ break;
+ } else {
+ privmsg(left + " more", chanOrNick);
+ }
+
+ continue;
+ } else if (messageParts[1].equals("!ignore")) {
+
+ try {
+ if (!messageParts[2].endsWith("$")) {
+ messageParts[2] = messageParts[2] + "$";
+ }
+ if (!messageParts[2].startsWith("^")) {
+ messageParts[2] = "^" + messageParts[2];
+ }
+ ignores.add(messageParts[2].trim());
+ } catch (PatternSyntaxException pse) {
+ log.error("Invalid pattern: " + pse.getPattern() + " from: " + chanOrNick);
+ privmsg("fail", chanOrNick);
+ }
+ privmsg("word", chanOrNick);
+ continue;
+ } else {
+ privmsg("fail", chanOrNick);
+ continue;
}
} else {
- log.debug("Detecing url from: " + Arrays.toString(messageParts));
int currMatches = 0;
for (int i = 0; i < messageParts.length; i++) {
String part = messageParts[i];
@@ -168,9 +236,19 @@ public void rwLoop() throws IOException {
}
try {
- new Thread(new TitleHandler(chanOrNick, part)).start();
- } catch (Throwable t) {
- log.error(t);
+ boolean ign = false;
+ for (String ignore : ignores) {
+ if (part.matches(ignore)) {
+ log.info("Ignore: " + part + " with pattern: " + ignore);
+ ign = true;
+ break;
+ }
+ }
+ if (!ign) {
+ es.execute(new TitleHandler(chanOrNick, part));
+ }
+ } catch (IllegalArgumentException iae) {
+ log.error(iae);
}
}
}
@@ -199,7 +277,7 @@ public void rwLoop() throws IOException {
}
}
- private class TitleHandler implements Runnable {
+ public class TitleHandler implements Runnable {
private String chanOrNick;
private URI uri;
@@ -211,9 +289,7 @@ public TitleHandler(String chanOrNick, String uriStr) throws IllegalArgumentExce
}
uri = URI.create(uriStr);
-
this.chanOrNick = chanOrNick;
-
}
@Override
@@ -228,28 +304,7 @@ public void run() {
URLConnection conn = uri.toURL().openConnection();
if (uri.getScheme().equals("https")) {
HttpsURLConnection https = (HttpsURLConnection) conn;
- SSLContext sc = SSLContext.getInstance("TLS");
- TrustManager[] tm = new TrustManager[] { new X509TrustManager() {
-
- @Override
- public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-
- }
-
- @Override
- public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException {
-
- }
-
- @Override
- public X509Certificate[] getAcceptedIssuers() {
-
- return null;
- }
-
- } };
- sc.init(null, tm, new SecureRandom());
- https.setSSLSocketFactory(sc.getSocketFactory());
+ https.setSSLSocketFactory(sf);
}
conn.setConnectTimeout(CONNECT_TIMEOUT_MS);
conn.setReadTimeout(CONNECT_TIMEOUT_MS);
@@ -275,16 +330,18 @@ public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws Certi
Matcher matcher = TITLE_PATTERN.matcher(buffer);
if (matcher.find()) {
String pageTitle = matcher.group(2).replaceAll("\\s+", " ").trim();
- TitleSimilarity ts = new TitleSimilarity(uri.toASCIIString(), pageTitle);
- if (!ts.isSimilar()) {
+ float score = isSimilar(uri.toASCIIString().toLowerCase(), pageTitle);
+ if (score <= 0.3f) {
+ log.info("Title contained in url (" + score + "): " + uri.toASCIIString() + " title: " + pageTitle);
log.info("Writeback of url to " + chanOrNick + " : " + chanOrNick);
synchronized (writer) {
- writer.write("PRIVMSG " + chanOrNick + " :" + pageTitle + "\n");
+ writer.write("PRIVMSG " + chanOrNick + " :" + pageTitle + nl);
writer.flush();
}
break;
} else {
+ log.info("Title not contained in url (" + score + "): " + uri.toASCIIString() + " title: " + pageTitle);
break;
}
}
@@ -300,6 +357,28 @@ public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws Certi
log.error(e);
}
}
+
+ public float isSimilar(String uri, String title) {
+
+ int keywordsTotal = 0;
+ int keywordsContained = 0;
+ float score = 0f;
+
+ for (String word : title.split("\\s+")) {
+ if (word.length() >= 2) {
+ keywordsTotal++;
+ if (uri.contains(word.toLowerCase())) {
+ keywordsContained++;
+ }
+ }
+ }
+
+ if (keywordsTotal == 0 || keywordsContained == 0) {
+ return -1;
+ }
+ score = (float) keywordsContained / keywordsTotal;
+ return score;
+ }
}
public boolean isReconnect() {
@@ -1,53 +0,0 @@
-
-package forskbot.irc;
-
-/**
- *
- * @author interhack
- *
- */
-public class TitleSimilarity {
-
- public static final int WORDLEN_THRESH = 2;
- //
- private int keywordsTotal = 0;
- private int keywordsContained = 0;
- //
- private float score = 0f;
-
- public TitleSimilarity(String uri, String title) {
-
- matchFilteredContained(uri.toLowerCase(), title.toLowerCase());
- }
-
- private void matchFilteredContained(String uri, String title) {
-
- for (String word : title.split("\\s+")) {
- if (word.length() >= WORDLEN_THRESH && word.matches("\\w+")) {
- keywordsTotal++;
- if (uri.contains(word)) {
- keywordsContained++;
- }
- }
- }
- }
-
- public boolean isSimilar() {
-
- if (keywordsTotal == 0 || keywordsContained == 0) {
- return false;
- }
-
- score = (float) keywordsContained / keywordsTotal;
- if (score >= 0.3f) {
- return true;
- } else {
- return false;
- }
- }
-
- public float getScore() {
-
- return score;
- }
-}
@@ -1,29 +0,0 @@
-
-package forskbot;
-
-import org.apache.log4j.ConsoleAppender;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.apache.log4j.PatternLayout;
-import org.junit.BeforeClass;
-
-/**
- *
- * @author interhack
- *
- */
-public class AbstractTest {
-
- @BeforeClass
- public static void init() throws Exception {
-
- Logger.getRootLogger().setLevel(Level.INFO);
- Logger.getRootLogger().addAppender(new ConsoleAppender(new PatternLayout(PatternLayout.TTCC_CONVERSION_PATTERN)));
-
- Logger.getRootLogger().getLoggerRepository().getLogger("forskbot").setLevel(Level.ALL);
- Logger.getRootLogger().getLoggerRepository().getLogger("forskbot").setAdditivity(false);
- Logger.getRootLogger().getLoggerRepository().getLogger("forskbot")
- .addAppender(new ConsoleAppender(new PatternLayout(PatternLayout.TTCC_CONVERSION_PATTERN)));
- }
-
-}
Oops, something went wrong.

0 comments on commit 71ecd3d

Please sign in to comment.