Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

add bti --shrink-urls

This patch adds a bti-shrink-urls script which uses http://2tu.us/ to
convert any URLs to something more sensible for a tweet.

bti, with the --shrink-urls option, now uses the above to convert any
URLs it finds.

'shrink-urls' can also be set to 'true' or 'yes' in the config file.

Signed-off-by: Bart Trojanowski <bart@jukie.net>
  • Loading branch information...
commit 3f148399e9109e25741fcd4a32e17317c573a5ea 1 parent 60a751f
@bartman bartman authored
Showing with 376 additions and 4 deletions.
  1. +6 −4 Makefile
  2. +96 −0 bti-shrink-urls
  3. +274 −0 bti.c
View
10 Makefile
@@ -19,6 +19,7 @@
VERSION = 015
PROGRAM = bti
+SCRIPTS = bti-shrink-urls
CORE_OBJS = \
bti.o
@@ -35,7 +36,8 @@ LD = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar
XML2_CFLAGS = `xml2-config --cflags`
-override CFLAGS += -g -Wall -pipe -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -O2 $(XML2_CFLAGS)
+PCRE_CFLAGS = `pcre-config --cflags`
+override CFLAGS += -g -Wall -pipe -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -O2 $(XML2_CFLAGS) $(PCRE_CFLAGS)
WARNINGS = -Wstrict-prototypes -Wsign-compare -Wshadow \
-Wchar-subscripts -Wmissing-declarations -Wnested-externs \
@@ -68,14 +70,14 @@ export E Q
#LIB_OBJS = -lcurl -lnsl -lssl -lcrypto
LIB_OBJS = -lcurl -lnsl -lreadline
LIB_XML2 = `xml2-config --libs`
+LIB_PCRE = `pcre-config --libs`
all: $(PROGRAM) $(MAN_PAGES)
# "Static Pattern Rule" to build all programs
bti: %: $(HEADERS) $(GEN_HEADERS) $(CORE_OBJS)
$(E) " LD " $@
- $(Q) $(LD) $(LDFLAGS) $(CORE_OBJS) -o $@ $(LIB_OBJS) $(LIB_XML2)
-
+ $(Q) $(LD) $(LDFLAGS) $(CORE_OBJS) -o $@ $(LIB_OBJS) $(LIB_XML2) $(LIB_PCRE)
# build the objects
%.o: %.c $(HEADERS) $(GEN_HEADERS)
@@ -119,6 +121,6 @@ release:
install: all
$(E) " INSTALL " ${DEST}${PREFIX}
- ${Q} ${INSTALL_BIN} ${PROGRAM}
+ ${Q} ${INSTALL_BIN} ${PROGRAM} ${SCRIPTS}
${Q} ${INSTALL_MAN} ${MAN_PAGES}
.PHONY: install
View
96 bti-shrink-urls
@@ -0,0 +1,96 @@
+#!/bin/bash
+# Copyright (C) 2009 Bart Trojanowski <bart@jukie.net>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+needs_escape=true
+
+while test -n "$1" ; do
+ word="$1"
+ shift
+ case "$word" in
+ --escaped)
+ needs_escape=
+ ;;
+ --help|-h)
+ cat <<END
+bti-shrink-urls - convert URLs to a shorter form using a web service
+
+ $0 [--escaped] [<url>]
+
+Currently only http://2tu.us/ is supported.
+END
+ exit 0
+ ;;
+ *)
+ URL=$word
+ ;;
+ esac
+done
+
+function convert_url() {
+ local url=$1
+ test -n "$url" || return 1
+ test "${url%:*}" = 'http' || return 1
+
+ local urllen="${#url}"
+
+ # http://en.wikipedia.org/wiki/Percent-encoding
+ if test -n "$needs_escape" ; then
+ url=$(echo "$url" | sed -e 's/\%/%25/g' \
+ -e 's/!/%21/g' \
+ -e 's/*/%2A/g' \
+ -e "s/'/%27/g" \
+ -e 's/(/%28/g' \
+ -e 's/)/%29/g' \
+ -e 's/;/%3B/g' \
+ -e 's/:/%3A/g' \
+ -e 's/@/%40/g' \
+ -e 's/&/%26/g' \
+ -e 's/=/%3D/g' \
+ -e 's/+/%2B/g' \
+ -e 's/\$/%24/g' \
+ -e 's/,/%2C/g' \
+ -e 's,/,%2F,g' \
+ -e 's/?/%3F/g' \
+ -e 's/#/%23/g' \
+ -e 's/\[/%5B/g' \
+ -e 's/]/%5D/g')
+ fi
+
+ # http://2tu.us/
+ local submit="http://2tu.us/?save=y&url=$url"
+
+ local res=$(wget -q -O - "$submit" | awk -F"'" '/Your tight URL is:/ { print $2 }')
+ if test "${res%:*}" = 'http' -a "${#res}" -lt "$urllen" ; then
+ echo $res
+ return 0
+ fi
+ return 1
+}
+
+function die() {
+ echo >&2 $@
+ exit 1
+}
+
+if test -n "$URL" ; then
+ convert_url "$URL" || die "Failed to shrink '$URL'"
+ exit $?
+fi
+
+test -t 0 && echo >&2 "Type in some urls and I'll try to shrink them for you..."
+while read line ; do
+ convert_url "$line" || echo $line
+done
View
274 bti.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2008 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2009 Bart Trojanowski <bart@jukie.net>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -26,11 +27,14 @@
#include <unistd.h>
#include <time.h>
#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include <curl/curl.h>
#include <readline/readline.h>
#include <libxml/xmlmemory.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
+#include <pcre.h>
#include "bti_version.h"
@@ -69,6 +73,7 @@ struct session {
char *logfile;
char *user;
int bash;
+ int shrink_urls;
enum host host;
enum action action;
};
@@ -94,6 +99,7 @@ static void display_help(void)
fprintf(stdout, " --proxy PROXY:PORT\n");
fprintf(stdout, " --host HOST\n");
fprintf(stdout, " --logfile logfile\n");
+ fprintf(stdout, " --shrink-urls\n");
fprintf(stdout, " --bash\n");
fprintf(stdout, " --debug\n");
fprintf(stdout, " --version\n");
@@ -427,6 +433,7 @@ static void parse_configfile(struct session *session)
char *action = NULL;
char *user = NULL;
char *file;
+ int shrink_urls = 0;
/* config file is ~/.bti */
file = alloca(strlen(session->homedir) + 7);
@@ -492,6 +499,12 @@ static void parse_configfile(struct session *session)
c += 5;
if (c[0] != '\0')
user = strdup(c);
+ } else if (!strncasecmp(c, "shrink-urls", 11) &&
+ (c[11] == '=')) {
+ c += 12;
+ if (!strncasecmp(c, "true", 4) ||
+ !strncasecmp(c, "yes", 3))
+ shrink_urls = 1;
}
} while (!feof(config_file));
@@ -531,6 +544,7 @@ static void parse_configfile(struct session *session)
if (user) {
session->user = user;
}
+ session->shrink_urls = shrink_urls;
/* Free buffer and close file. */
free(line);
@@ -615,6 +629,259 @@ static char *get_string_from_stdin(void)
return string;
}
+static int find_urls(const char *tweet, int **pranges)
+{
+ // magic obtained from http://www.geekpedia.com/KB65_How-to-validate-an-URL-using-RegEx-in-Csharp.html
+ static const char *re_magic =
+ "(([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)/{1,3}"
+ "[0-9a-zA-Z;/~?:@&=+$\\.\\-_'()%]+)"
+ "(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?";
+ pcre *re;
+ const char *errptr;
+ int erroffset;
+ int ovector[10] = {0,};
+ const size_t ovsize = sizeof(ovector)/sizeof(*ovector);
+ int startoffset, tweetlen;
+ int i, rc;
+ int rbound = 10;
+ int rcount = 0;
+ int *ranges = malloc(sizeof(int) * rbound);
+
+ re = pcre_compile(re_magic,
+ PCRE_NO_AUTO_CAPTURE,
+ &errptr, &erroffset, NULL);
+ if (!re) {
+ fprintf(stderr, "pcre_compile @%u: %s\n", erroffset, errptr);
+ exit (1);
+ }
+
+ tweetlen = strlen(tweet);
+ for (startoffset=0; startoffset<tweetlen; ) {
+
+ rc = pcre_exec(re, NULL, tweet, strlen(tweet), startoffset, 0,
+ ovector, ovsize);
+ if (rc == PCRE_ERROR_NOMATCH)
+ break;
+
+ if (rc<0) {
+ fprintf(stderr, "pcre_exec @%u: %s\n", erroffset, errptr);
+ exit (1);
+ }
+
+ for (i=0; i<rc; i+=2) {
+ if ((rcount+2) == rbound) {
+ rbound *= 2;
+ ranges = realloc(ranges, sizeof(int) * rbound);
+ }
+
+ ranges[rcount++] = ovector[i];
+ ranges[rcount++] = ovector[i+1];
+ }
+
+ startoffset = ovector[1];
+ }
+
+ pcre_free(re);
+
+ *pranges = ranges;
+ return rcount;
+}
+
+/**
+ * bidirectional popen() call
+ *
+ * @param rwepipe - int array of size three
+ * @param exe - program to run
+ * @param argv - argument list
+ * @return pid or -1 on error
+ *
+ * The caller passes in an array of three integers (rwepipe), on successful
+ * execution it can then write to element 0 (stdin of exe), and read from
+ * element 1 (stdout) and 2 (stderr).
+ */
+static int popenRW(int *rwepipe, const char *exe, const char *const argv[])
+{
+ int in[2];
+ int out[2];
+ int err[2];
+ int pid;
+ int rc;
+
+ rc = pipe(in);
+ if (rc<0)
+ goto error_in;
+
+ rc = pipe(out);
+ if (rc<0)
+ goto error_out;
+
+ rc = pipe(err);
+ if (rc<0)
+ goto error_err;
+
+ pid = fork();
+ if (pid > 0) { // parent
+ close(in[0]);
+ close(out[1]);
+ close(err[1]);
+ rwepipe[0] = in[1];
+ rwepipe[1] = out[0];
+ rwepipe[2] = err[0];
+ return pid;
+ } else if (pid == 0) { // child
+ close(in[1]);
+ close(out[0]);
+ close(err[0]);
+ close(0);
+ dup(in[0]);
+ close(1);
+ dup(out[1]);
+ close(2);
+ dup(err[1]);
+
+ execvp(exe, (char**)argv);
+ exit(1);
+ } else
+ goto error_fork;
+
+ return pid;
+
+error_fork:
+ close(err[0]);
+ close(err[1]);
+error_err:
+ close(out[0]);
+ close(out[1]);
+error_out:
+ close(in[0]);
+ close(in[1]);
+error_in:
+ return -1;
+}
+
+static int pcloseRW(int pid, int *rwepipe)
+{
+ int rc, status;
+ close(rwepipe[0]);
+ close(rwepipe[1]);
+ close(rwepipe[2]);
+ rc = waitpid(pid, &status, 0);
+ return status;
+}
+
+static char *shrink_one_url(int *rwepipe, char *big)
+{
+ int biglen = strlen(big);
+ char *small;
+ int smalllen;
+ int rc;
+
+ rc = dprintf(rwepipe[0], "%s\n", big);
+ if (rc < 0)
+ return big;
+
+ smalllen = biglen + 128;
+ small = malloc(smalllen);
+ if (!small)
+ return big;
+
+ rc = read(rwepipe[1], small, smalllen);
+ if (rc < 0 || rc > biglen)
+ goto error_free_small;
+
+ if (strncmp(small, "http://", 7))
+ goto error_free_small;
+
+ smalllen = rc;
+ while (smalllen && isspace(small[smalllen-1]))
+ small[--smalllen] = 0;
+
+ free (big);
+ return small;
+
+error_free_small:
+ free(small);
+ return big;
+}
+
+static char *shrink_urls(char *text)
+{
+ int *ranges;
+ int rcount;
+ int i;
+ int inofs = 0;
+ int outofs = 0;
+ const char *const shrink_args[] = {
+ "bti-shrink-urls",
+ NULL
+ };
+ int shrink_pid;
+ int shrink_pipe[2];
+ int inlen = strlen(text);
+
+ dbg("before len=%u\n", inlen);
+
+ shrink_pid = popenRW(shrink_pipe, shrink_args[0], shrink_args);
+ if (shrink_pid < 0)
+ return text;
+
+ rcount = find_urls(text, &ranges);
+
+ for (i=0; i<rcount; i+=2) {
+ int url_start = ranges[i];
+ int url_end = ranges[i+1];
+ int long_url_len = url_end - url_start;
+ char *url = strndup(text + url_start, long_url_len);
+ int short_url_len;
+ int not_url_len = url_start - inofs;
+
+ dbg("long url[%u]: %s\n", long_url_len, url);
+ url = shrink_one_url(shrink_pipe, url);
+ short_url_len = url ? strlen(url) : 0;
+ dbg("short url[%u]: %s\n", short_url_len, url);
+
+ if (!url || short_url_len >= long_url_len) {
+ // the short url ended up being too long or unavailable
+ if (inofs) {
+ strncpy(text + outofs, text + inofs,
+ not_url_len + long_url_len);
+ }
+ inofs += not_url_len + long_url_len;
+ outofs += not_url_len + long_url_len;
+
+ } else {
+ // copy the unmodified block
+ strncpy(text + outofs, text + inofs, not_url_len);
+ inofs += not_url_len;
+ outofs += not_url_len;
+
+ // copy the new url
+ strncpy(text + outofs, url, short_url_len);
+ inofs += long_url_len;
+ outofs += short_url_len;
+ }
+
+ free (url);
+ }
+
+ // copy the last block after the last match
+ if (inofs) {
+ int tail = inlen - inofs;
+ if (tail) {
+ strncpy(text + outofs, text + inofs, tail);
+ outofs += tail;
+ }
+ }
+
+ free(ranges);
+
+ (void)pcloseRW(shrink_pid, shrink_pipe);
+
+ text[outofs] = 0;
+ dbg("after len=%u\n", outofs);
+ return text;
+}
+
int main(int argc, char *argv[], char *envp[])
{
static const struct option options[] = {
@@ -626,6 +893,7 @@ int main(int argc, char *argv[], char *envp[])
{ "action", 1, NULL, 'A' },
{ "user", 1, NULL, 'u' },
{ "logfile", 1, NULL, 'L' },
+ { "shrink-urls", 0, NULL, 's' },
{ "help", 0, NULL, 'h' },
{ "bash", 0, NULL, 'b' },
{ "version", 0, NULL, 'v' },
@@ -723,6 +991,9 @@ int main(int argc, char *argv[], char *envp[])
session->logfile = strdup(optarg);
dbg("logfile = %s\n", session->logfile);
break;
+ case 's':
+ session->shrink_urls = 1;
+ break;
case 'H':
if (strcasecmp(optarg, "twitter") == 0)
session->host = HOST_TWITTER;
@@ -771,6 +1042,9 @@ int main(int argc, char *argv[], char *envp[])
return -1;
}
+ if (session->shrink_urls)
+ tweet = shrink_urls(tweet);
+
session->tweet = zalloc(strlen(tweet) + 10);
if (session->bash)
sprintf(session->tweet, "$ %s", tweet);
Please sign in to comment.
Something went wrong with that request. Please try again.