Permalink
Browse files

Initial commit

  • Loading branch information...
barnabywalters committed Oct 16, 2012
1 parent 7247241 commit 8fa31395b94c0221c53bcd0380593d39ba4e31a4
Showing with 628 additions and 0 deletions.
  1. BIN BarnabyWalters/.DS_Store
  2. +352 −0 BarnabyWalters/Helpers.php
  3. +16 −0 composer.json
  4. +260 −0 tests/HelpersTest.php
View
Binary file not shown.
View
@@ -0,0 +1,352 @@
+<?php
+
+namespace BarnabyWalters\Helpers;
+
+/**
+ * Functional Helpers
+ *
+ * In which I define a load of helpful little functions, nicely namespaced.
+ * Actually, the namespaceing is probably redundant, and may be reduced in future versions.
+ *
+ * Some of these are stolen from elsewhere, credit given where due.
+ *
+ */
+class Helpers {
+
+ /**
+ * Returns the truest of the args presented. This is a dirty shortcut.
+ */
+ static function truest()
+ {
+ foreach (func_get_args() as $arg)
+ {
+ if (!empty($arg))
+ {
+ return $arg;
+ }
+ }
+ }
+
+ /**
+ * Shortcut for pretty printing
+ *
+ * @param mixed $var The information to print
+ */
+ static function pp($var)
+ {
+ if (is_array($var) or is_object($var))
+ {
+ echo '<pre>';
+ print_r($var);
+ echo '</pre>';
+ }
+ else
+ {
+ var_dump($var);
+ }
+ }
+
+ /**
+ * Parse a representation of an author out of a URI
+ *
+ * Given a URI, returns a plaintext representation of the author of that URI,
+ * nice and ready to be processed/auto linked in whatever way you see fit.
+ * Currently enabled for the following services:
+ *
+ * * Twitter
+ * * Indiewebsite domain (sans protocol) — assumed if doesn’t fit anything else
+ *
+ * @param string $uri The URI to parse
+ * @return string The parsed author representation, e.g. @barnabywalters or waterpigs.co.uk
+ */
+ static function authorFromUri($uri)
+ {
+ $matches = array();
+ if (preg_match('|^https?://twitter.com/([a-zA-Z0-9_]{1,20})/|', $uri, $matches))
+ {
+ // It’s a twitter URI, $matches[1] contains the @name of the user in question
+ return '@' . strtolower($matches[1]);
+ }
+ else
+ {
+ // Assume it’s an indieweb URL, so the domain is the name
+ return parse_url($uri, PHP_URL_HOST);
+ }
+ }
+
+ /**
+ * Fetch Open Graph Image at URL
+ *
+ * Given a valid OpenPhoto image page URI, this function fetches the page and parses the open graph
+ * og:image property out of it
+ *
+ * @param string $uri The URI of the photo page to retrieve
+ * @param string $host The openphoto host to use (no trailing slash)
+ * @return string|bool The URI of the image represented by $uri or false if none found
+ * @todo implement
+ */
+ static function FetchOpenGraphImage($uri)
+ {
+ // Find all occurances of $host . '/p/:id'
+
+ // For each, retrieve the page and look for
+ $html = file_get_contents('http://photos.waterpigs.co.uk/p/oh');
+ preg_match('/<meta property\=\"og:image\" content\=\"(\W)\"/', $html);
+ }
+
+ /**
+ * ~ THE TRUNCENATOR ~
+ *
+ * Takes a string (tweet-like note) and some config params, produces a truncated version to spec.
+ *
+ * @param string $string The string to be truncated
+ * @param int $length The maximum length of the output
+ * @param string $ellipsis The string to append in the case of truncation
+ * @param string $uri The canonical URI of the post, to be added to the end
+ * @param int $urilen Treat any URLs as if they were this length
+ * @param bool $parens If trucation is not required, surround the canon. link with parens (())
+ * @param int $hashtags The number of hashtags present in the text to preserve if trucation occurs
+ *
+ * @return string The truncated string
+ * @todo A lot of this functionality is not properly implemented
+ */
+ static function truncate($string, $length=140, $uri=null, $urilen=null, $parens=true, $ellipsis='', $hastags=1)
+ {
+ mb_internal_encoding('UTF-8');
+
+ // Figure out total append length if truncation occurs
+ $append = $ellipsis;
+ if (!empty($uri)) $append .= ' ' . $uri;
+
+ // if $urilen is set, create array of URIs within the text and replace them with dummy text @ $urilen chars
+ if (is_int($urilen))
+ {
+ $uris = array();
+ foreach (Helpers::findUrls($string, $tidy=false) as $key => $url)
+ {
+ $dummy = 'URL' . $key;
+ $dummy .= str_repeat('X', $urilen - mb_strlen($dummy));
+ $uris[$dummy] = $url;
+ $string = str_replace($url, $dummy, $string);
+ }
+ }
+
+ // Truncate string to nearest WB below that length
+ $matches = array();
+ $words = array();
+ preg_match_all('/\b\w+\b/', $string, $matches, PREG_OFFSET_CAPTURE);
+ foreach ($matches[0] as $match)
+ {
+ // For each match
+ $words[] = array($match[1], $match[0]);
+ }
+ // $words = {[offset, 'string'], [offset, 'string'] •••}
+
+ $maxplainlen = $length - Helpers::uriMbStrlen($append, $urilen);
+
+ // See if truncation will happen
+ if (Helpers::uriMbStrlen($string, $urilen) > $maxplainlen)
+ {
+ foreach ($words as $key => $word)
+ {
+ // Is the current word the first to cross $maxplainlen?
+ if ($word[0] > $maxplainlen or $word[0] + mb_strlen($word[1]) > $maxplainlen)
+ {
+ // Yes. The current word and all words after it must be removed
+ $plaintargetlen = $words[$key-1][0] + mb_strlen($words[$key-1][1]);
+ break;
+ }
+ }
+
+ if (!isset($plaintargetlen)) $plaintargetlen = $maxplainlen;
+
+ // Truncate string
+ $truncatedplain = mb_substr($string, 0, $plaintargetlen);
+
+ // Add the append
+ $trunc = $truncatedplain . $append;
+ }
+ else
+ {
+ // If no trucation required, just append the URL
+ // TODO: if adding the space and brackets will push over the edge, remove enough words to compensate
+ // TODO: write edge-case test to cover that scenario
+ $trunc = $string . ' (' . $uri . ')';
+ }
+
+ // if $urilen set, expand dummies into full URIs
+ if (is_int($urilen))
+ {
+ foreach ($uris as $dummy => $uri)
+ {
+ $trunc = str_replace($dummy, $uri, $trunc);
+ }
+ }
+
+ return $trunc;
+ }
+
+ /**
+ * Replace <img> elements with their @href
+ *
+ * Finds all img elements and replaces them with the value of their @href. Very useful for content syndication
+ * to services which do not allow HTML
+ *
+ * @param string $str The string to process
+ * @return string The original $str with all <img> tags replaced by their @href value
+ */
+ static function expandImg($str)
+ {
+ return preg_replace('/<img .*src\=\"(\S*)\"+ .* ?\/?>/i', '$1', $str);
+ }
+
+ /**
+ * Find the length a string would be if all URLs were a certain length
+ *
+ * @param string $string The string to process
+ * @param int $urilen The length to treat all URIs in $string as
+ * @return int The length $string would be if all URIs were $urilen long
+ */
+ static function uriMbStrlen($string, $urilen)
+ {
+ // Find all urls
+ $urls = Helpers::findUrls($string, $tidy=false);
+
+ // Replace them with $urllen chars
+ if (is_int($urilen))
+ {
+ foreach ($urls as $url)
+ {
+ $string = str_replace($url, str_repeat('X', $urilen), $string);
+ }
+ }
+
+ // Return strlen
+ return mb_strlen($string, 'UTF-8');
+ }
+
+ /**
+ * DateTime to <time>
+ *
+ * Generates a <time> element given a PHP DateTime object
+ * Currently only supports a resolution of YYYY-MM-DD
+ *
+ * @todo Add support for more precise times
+ * @todo Add support for string dates using strtotime()
+ *
+ * @param DateTime $datetime The datetime to turn into a <time> element
+ * @return string A <time> element representing $datetime
+ */
+ static function timeElement($datetime)
+ {
+ $t = '<time datetime="' . $datetime -> format('Y-m-d') . '" title="' . $datetime -> format('Y-z') . '">' . $datetime -> format('Y-m-d') . '</time>';
+ return $t;
+ }
+
+ /**
+ * Slugify
+ *
+ * The ultimate safe URL generator, courtesy of http://cubiq.org/the-perfect-php-clean-url-generator
+ * Given a string, makes it uber-readable and URI safe
+ *
+ * @param string $str The string to process
+ * @param array $replace An array of characters to replace with whitespace
+ * @param string $delimiter The character to use to separate words, defaulting to '-'
+ * @return string The cleaned string
+ */
+ static function toAscii($str, $replace=array(), $delimiter='-')
+ {
+ setlocale(LC_ALL, 'en_US.UTF8');
+
+ if(!empty($replace)) {
+ $str = str_replace((array)$replace, ' ', $str);
+ }
+
+ $clean = iconv('UTF-8', 'ASCII//TRANSLIT', $str);
+ $clean = preg_replace("/[^a-zA-Z0-9\/_|+ -]/", '', $clean);
+ $clean = strtolower(trim($clean, '-'));
+ $clean = preg_replace("/[\/_|+ -]+/", $delimiter, $clean);
+
+ return $clean;
+ }
+
+ /**
+ * Tagstring to Array
+ *
+ * Takes a comma delimited tag string, returns an array of the tags contained within.
+ *
+ * @param string $tagstring The comma delimited string to process
+ * @return array An array of the tags contained within $tagstring
+ */
+ static function tagstringToArray($tagstring)
+ {
+ $tags = explode(',', trim($tagstring));
+ $tags = array_map(function($string) {
+ return htmlspecialchars(trim($string), ENT_QUOTES);
+ }, $tags);
+ return $tags;
+ }
+
+ /**
+ * Clean Tagstring
+ *
+ * Normalises a tag string by converting it to an array, then collapsing the array into a string.
+ *
+ * @param string $tagstring The comma-delimited string to clean
+ * @return string The cleaned string
+ */
+ static function tagstringClean($tagstring)
+ {
+ $tags = Helpers::tagstringToArray($tagstring);
+ return implode(',', $tags);
+ }
+
+ /**
+ * Date to ATOM Date
+ *
+ * @param string $date A string representing the date to process
+ * @param string $date formatted as an ATOM date
+ *
+ * @todo Allow $date to be a DateTime object
+ */
+ static function atomDate($date)
+ {
+ return date(DATE_ATOM, strtotime($date));
+ }
+
+ /**
+ * Find URLs
+ *
+ * @param string $text The string to find URLs in
+ * @param bool $tidy Whether or not to tidy the URLs with cassis web_address_to_uri(, true)
+ * @return array An array containing all the URLs found in $text
+ */
+ static function findUrls($text, $tidy=true)
+ {
+ // Pattern is from 1 cassis.js, slightly modified to not look for twitter names
+ // E.G. beforehand it would return @tantek for @tantek.com. This function is just interested in addresses, not twitter stuff
+ $pattern = '/(?:(?:(?:(?:http|https|irc)?:\\/\\/(?:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+(?:\\:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+)?\\@)?)?(?:(?:(?:[a-zA-Z0-9][-a-zA-Z0-9]*\\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|j[emop]|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|museum|m[acdeghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\\:\\d{1,5})?)(?:\\/(?:(?:[!#&-;=?-Z_a-z~])|(?:\\%[a-fA-F0-9]{2}))*)?)(?=\\b|\\s|$)/';
+
+ $c = preg_match_all($pattern, $text, $m);
+
+ if($c)
+ {
+ // Normalise
+ $links = array_values($m[0]);
+
+ ob_start();
+ $links = array_map(function($value) use ($tidy) {
+ return $tidy ? web_address_to_uri($value, true) : $value;
+ }, $links);
+ ob_end_clean();
+
+ // $links = ['http://someurl.tld', •••]
+
+ return $links;
+ }
+
+ return array();
+ }
+}
+
+// EOF Helpers.php
View
@@ -0,0 +1,16 @@
+{
+ "name": "barnabywalters/helpers",
+ "type": "library",
+ "description": "Yet another library of those damned helper functions.",
+ "autoload": {
+ "psr-0": {
+ "BarnabyWalters": ""
+ }
+ }
+ "require": {
+ "tantek/cassis": "*"
+ }
+ "suggest": {
+ "dflydev/markdown": "Required in order for processText to work properly"
+ }
+}
Oops, something went wrong.

0 comments on commit 8fa3139

Please sign in to comment.