Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
barnabywalters committed Oct 16, 2012
1 parent 7247241 commit 8fa3139
Show file tree
Hide file tree
Showing 4 changed files with 628 additions and 0 deletions.
Binary file added BarnabyWalters/.DS_Store
Binary file not shown.
352 changes: 352 additions & 0 deletions BarnabyWalters/Helpers.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,352 @@
<?php

namespace BarnabyWalters\Helpers;

/**
* Functional Helpers
*
* In which I define a load of helpful little functions, nicely namespaced.
* Actually, the namespaceing is probably redundant, and may be reduced in future versions.
*
* Some of these are stolen from elsewhere, credit given where due.
*
*/
class Helpers {

/**
* Returns the truest of the args presented. This is a dirty shortcut.
*/
static function truest()
{
foreach (func_get_args() as $arg)
{
if (!empty($arg))
{
return $arg;
}
}
}

/**
* Shortcut for pretty printing
*
* @param mixed $var The information to print
*/
static function pp($var)
{
if (is_array($var) or is_object($var))
{
echo '<pre>';
print_r($var);
echo '</pre>';
}
else
{
var_dump($var);
}
}

/**
* Parse a representation of an author out of a URI
*
* Given a URI, returns a plaintext representation of the author of that URI,
* nice and ready to be processed/auto linked in whatever way you see fit.
* Currently enabled for the following services:
*
* * Twitter
* * Indiewebsite domain (sans protocol) — assumed if doesn’t fit anything else
*
* @param string $uri The URI to parse
* @return string The parsed author representation, e.g. @barnabywalters or waterpigs.co.uk
*/
static function authorFromUri($uri)
{
$matches = array();
if (preg_match('|^https?://twitter.com/([a-zA-Z0-9_]{1,20})/|', $uri, $matches))
{
// It’s a twitter URI, $matches[1] contains the @name of the user in question
return '@' . strtolower($matches[1]);
}
else
{
// Assume it’s an indieweb URL, so the domain is the name
return parse_url($uri, PHP_URL_HOST);
}
}

/**
* Fetch Open Graph Image at URL
*
* Given a valid OpenPhoto image page URI, this function fetches the page and parses the open graph
* og:image property out of it
*
* @param string $uri The URI of the photo page to retrieve
* @param string $host The openphoto host to use (no trailing slash)
* @return string|bool The URI of the image represented by $uri or false if none found
* @todo implement
*/
static function FetchOpenGraphImage($uri)
{
// Find all occurances of $host . '/p/:id'

// For each, retrieve the page and look for
$html = file_get_contents('http://photos.waterpigs.co.uk/p/oh');
preg_match('/<meta property\=\"og:image\" content\=\"(\W)\"/', $html);
}

/**
* ~ THE TRUNCENATOR ~
*
* Takes a string (tweet-like note) and some config params, produces a truncated version to spec.
*
* @param string $string The string to be truncated
* @param int $length The maximum length of the output
* @param string $ellipsis The string to append in the case of truncation
* @param string $uri The canonical URI of the post, to be added to the end
* @param int $urilen Treat any URLs as if they were this length
* @param bool $parens If trucation is not required, surround the canon. link with parens (())
* @param int $hashtags The number of hashtags present in the text to preserve if trucation occurs
*
* @return string The truncated string
* @todo A lot of this functionality is not properly implemented
*/
static function truncate($string, $length=140, $uri=null, $urilen=null, $parens=true, $ellipsis='', $hastags=1)
{
mb_internal_encoding('UTF-8');

// Figure out total append length if truncation occurs
$append = $ellipsis;
if (!empty($uri)) $append .= ' ' . $uri;

// if $urilen is set, create array of URIs within the text and replace them with dummy text @ $urilen chars
if (is_int($urilen))
{
$uris = array();
foreach (Helpers::findUrls($string, $tidy=false) as $key => $url)
{
$dummy = 'URL' . $key;
$dummy .= str_repeat('X', $urilen - mb_strlen($dummy));
$uris[$dummy] = $url;
$string = str_replace($url, $dummy, $string);
}
}

// Truncate string to nearest WB below that length
$matches = array();
$words = array();
preg_match_all('/\b\w+\b/', $string, $matches, PREG_OFFSET_CAPTURE);
foreach ($matches[0] as $match)
{
// For each match
$words[] = array($match[1], $match[0]);
}
// $words = {[offset, 'string'], [offset, 'string'] •••}

$maxplainlen = $length - Helpers::uriMbStrlen($append, $urilen);

// See if truncation will happen
if (Helpers::uriMbStrlen($string, $urilen) > $maxplainlen)
{
foreach ($words as $key => $word)
{
// Is the current word the first to cross $maxplainlen?
if ($word[0] > $maxplainlen or $word[0] + mb_strlen($word[1]) > $maxplainlen)
{
// Yes. The current word and all words after it must be removed
$plaintargetlen = $words[$key-1][0] + mb_strlen($words[$key-1][1]);
break;
}
}

if (!isset($plaintargetlen)) $plaintargetlen = $maxplainlen;

// Truncate string
$truncatedplain = mb_substr($string, 0, $plaintargetlen);

// Add the append
$trunc = $truncatedplain . $append;
}
else
{
// If no trucation required, just append the URL
// TODO: if adding the space and brackets will push over the edge, remove enough words to compensate
// TODO: write edge-case test to cover that scenario
$trunc = $string . ' (' . $uri . ')';
}

// if $urilen set, expand dummies into full URIs
if (is_int($urilen))
{
foreach ($uris as $dummy => $uri)
{
$trunc = str_replace($dummy, $uri, $trunc);
}
}

return $trunc;
}

/**
* Replace <img> elements with their @href
*
* Finds all img elements and replaces them with the value of their @href. Very useful for content syndication
* to services which do not allow HTML
*
* @param string $str The string to process
* @return string The original $str with all <img> tags replaced by their @href value
*/
static function expandImg($str)
{
return preg_replace('/<img .*src\=\"(\S*)\"+ .* ?\/?>/i', '$1', $str);
}

/**
* Find the length a string would be if all URLs were a certain length
*
* @param string $string The string to process
* @param int $urilen The length to treat all URIs in $string as
* @return int The length $string would be if all URIs were $urilen long
*/
static function uriMbStrlen($string, $urilen)
{
// Find all urls
$urls = Helpers::findUrls($string, $tidy=false);

// Replace them with $urllen chars
if (is_int($urilen))
{
foreach ($urls as $url)
{
$string = str_replace($url, str_repeat('X', $urilen), $string);
}
}

// Return strlen
return mb_strlen($string, 'UTF-8');
}

/**
* DateTime to <time>
*
* Generates a <time> element given a PHP DateTime object
* Currently only supports a resolution of YYYY-MM-DD
*
* @todo Add support for more precise times
* @todo Add support for string dates using strtotime()
*
* @param DateTime $datetime The datetime to turn into a <time> element
* @return string A <time> element representing $datetime
*/
static function timeElement($datetime)
{
$t = '<time datetime="' . $datetime -> format('Y-m-d') . '" title="' . $datetime -> format('Y-z') . '">' . $datetime -> format('Y-m-d') . '</time>';
return $t;
}

/**
* Slugify
*
* The ultimate safe URL generator, courtesy of http://cubiq.org/the-perfect-php-clean-url-generator
* Given a string, makes it uber-readable and URI safe
*
* @param string $str The string to process
* @param array $replace An array of characters to replace with whitespace
* @param string $delimiter The character to use to separate words, defaulting to '-'
* @return string The cleaned string
*/
static function toAscii($str, $replace=array(), $delimiter='-')
{
setlocale(LC_ALL, 'en_US.UTF8');

if(!empty($replace)) {
$str = str_replace((array)$replace, ' ', $str);
}

$clean = iconv('UTF-8', 'ASCII//TRANSLIT', $str);
$clean = preg_replace("/[^a-zA-Z0-9\/_|+ -]/", '', $clean);
$clean = strtolower(trim($clean, '-'));
$clean = preg_replace("/[\/_|+ -]+/", $delimiter, $clean);

return $clean;
}

/**
* Tagstring to Array
*
* Takes a comma delimited tag string, returns an array of the tags contained within.
*
* @param string $tagstring The comma delimited string to process
* @return array An array of the tags contained within $tagstring
*/
static function tagstringToArray($tagstring)
{
$tags = explode(',', trim($tagstring));
$tags = array_map(function($string) {
return htmlspecialchars(trim($string), ENT_QUOTES);
}, $tags);
return $tags;
}

/**
* Clean Tagstring
*
* Normalises a tag string by converting it to an array, then collapsing the array into a string.
*
* @param string $tagstring The comma-delimited string to clean
* @return string The cleaned string
*/
static function tagstringClean($tagstring)
{
$tags = Helpers::tagstringToArray($tagstring);
return implode(',', $tags);
}

/**
* Date to ATOM Date
*
* @param string $date A string representing the date to process
* @param string $date formatted as an ATOM date
*
* @todo Allow $date to be a DateTime object
*/
static function atomDate($date)
{
return date(DATE_ATOM, strtotime($date));
}

/**
* Find URLs
*
* @param string $text The string to find URLs in
* @param bool $tidy Whether or not to tidy the URLs with cassis web_address_to_uri(, true)
* @return array An array containing all the URLs found in $text
*/
static function findUrls($text, $tidy=true)
{
// Pattern is from 1 cassis.js, slightly modified to not look for twitter names
// E.G. beforehand it would return @tantek for @tantek.com. This function is just interested in addresses, not twitter stuff
$pattern = '/(?:(?:(?:(?:http|https|irc)?:\\/\\/(?:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+(?:\\:(?:[!$&-.0-9;=?A-Z_a-z]|(?:\\%[a-fA-F0-9]{2}))+)?\\@)?)?(?:(?:(?:[a-zA-Z0-9][-a-zA-Z0-9]*\\.)+(?:(?:aero|arpa|asia|a[cdefgilmnoqrstuwxz])|(?:biz|b[abdefghijmnorstvwyz])|(?:cat|com|coop|c[acdfghiklmnoruvxyz])|d[ejkmoz]|(?:edu|e[cegrstu])|f[ijkmor]|(?:gov|g[abdefghilmnpqrstuwy])|h[kmnrtu]|(?:info|int|i[delmnoqrst])|j[emop]|k[eghimnrwyz]|l[abcikrstuvy]|(?:mil|museum|m[acdeghklmnopqrstuvwxyz])|(?:name|net|n[acefgilopruz])|(?:org|om)|(?:pro|p[aefghklmnrstwy])|qa|r[eouw]|s[abcdeghijklmnortuvyz]|(?:tel|travel|t[cdfghjklmnoprtvwz])|u[agkmsyz]|v[aceginu]|w[fs]|y[etu]|z[amw]))|(?:(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])\\.(?:25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[0-9])))(?:\\:\\d{1,5})?)(?:\\/(?:(?:[!#&-;=?-Z_a-z~])|(?:\\%[a-fA-F0-9]{2}))*)?)(?=\\b|\\s|$)/';

$c = preg_match_all($pattern, $text, $m);

if($c)
{
// Normalise
$links = array_values($m[0]);

ob_start();
$links = array_map(function($value) use ($tidy) {
return $tidy ? web_address_to_uri($value, true) : $value;
}, $links);
ob_end_clean();

// $links = ['http://someurl.tld', •••]

return $links;
}

return array();
}
}

// EOF Helpers.php
16 changes: 16 additions & 0 deletions composer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"name": "barnabywalters/helpers",
"type": "library",
"description": "Yet another library of those damned helper functions.",
"autoload": {
"psr-0": {
"BarnabyWalters": ""
}
}
"require": {
"tantek/cassis": "*"
}
"suggest": {
"dflydev/markdown": "Required in order for processText to work properly"
}
}
Loading

0 comments on commit 8fa3139

Please sign in to comment.