Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 794 lines (624 sloc) 21.354 kb
4fe1102 @benbalter initial commit
authored
1 <?php
fb4ef4e @benbalter comment goodness
authored
2 /**
3 * Site Inspector Class
4 *
5 * @author Benjamin J. Blater
6 * @version 0.1
7 * @pacakge siteinspector
8 * @license GPL2
9 */
4fe1102 @benbalter initial commit
authored
10
fb4ef4e @benbalter comment goodness
authored
11 class SiteInspector {
4fe1102 @benbalter initial commit
authored
12
cf8448c @benbalter basic class structure
authored
13 static $instance;
14
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
15 public $cachelife = 3600;
16
fb4ef4e @benbalter comment goodness
authored
17 //defaults to look for; can be overriden by user
164d1fd @benbalter switched find/label order
authored
18 //format [search] => [label]
dbbee58 @benbalter actually works now
authored
19 public $searches = array(
20
21 'cloud' => array(
164d1fd @benbalter switched find/label order
authored
22 'amazon'=>'Amazon',
23 'rackspace' => 'Rackspace'
dbbee58 @benbalter actually works now
authored
24 ),
4fe1102 @benbalter initial commit
authored
25
dbbee58 @benbalter actually works now
authored
26 'cdn' => array(
164d1fd @benbalter switched find/label order
authored
27 'Akamai' => 'akamai',
28 'edgekey.net' => 'Akamai',
29 'akam.net' => 'Akamai',
30 'akadns.net' => 'Akamai',
dbbee58 @benbalter actually works now
authored
31 ),
4fe1102 @benbalter initial commit
authored
32
dbbee58 @benbalter actually works now
authored
33 'cms' => array(
164d1fd @benbalter switched find/label order
authored
34 'joomla' => 'Joomla',
35 'wordpress' => 'WordPress',
36 'wp-content' => 'WordPress',
37 'drupal' => 'Drupal',
70b54af @benbalter https
authored
38 'sites\/default\/' => 'Drupal',
39 'sites\/all\/' => 'Drupal',
164d1fd @benbalter switched find/label order
authored
40 'xoops' => 'Xoops',
41 'mediawiki' => 'MediaWiki',
42 'php-nuke' => 'PHP-Nuke',
43 'typepad' => 'Typepad',
44 'moveable type' => 'Moveable Type',
45 'bbpress' => 'BBPress',
46 'blogger' => 'Blogger',
47 'sharepoint' => 'Sharepoint',
48 'zencart' => 'Zencart',
49 'phpbb' => 'PhpBB',
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
50 'tumblr' => 'tumblr',
164d1fd @benbalter switched find/label order
authored
51 'liferay' => 'Liferay',
dbbee58 @benbalter actually works now
authored
52 ),
4fe1102 @benbalter initial commit
authored
53
dbbee58 @benbalter actually works now
authored
54 'analytics' => array(
164d1fd @benbalter switched find/label order
authored
55 'google-analytics' => 'Google Analytics',
56 'ga.js' => 'Google Analytics',
57 'ua-' => 'Google Analytics',
4a4d459 script search fix
bitnami authored
58 '_gaq' => 'Google Analytics',
164d1fd @benbalter switched find/label order
authored
59 'quantcast' => 'Quantcast',
60 'disqus' => 'Disqus',
61 'GetSatisfaction' => 'GetSatisfaction',
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
62 'AdSense' => 'AdSense',
63 'AddThis' => 'AddThis',
dbbee58 @benbalter actually works now
authored
64 ),
4fe1102 @benbalter initial commit
authored
65
dbbee58 @benbalter actually works now
authored
66 'scripts' => array(
164d1fd @benbalter switched find/label order
authored
67 'prototype' => 'Prototype',
68 'jquery' => 'jQuery',
69 'mootools' => 'Mootools',
70 'dojo' => 'Dojo',
71 'scriptalicious' => 'Scriptaculous',
dbbee58 @benbalter actually works now
authored
72 ),
73
74 'gapps' => array (
164d1fd @benbalter switched find/label order
authored
75 'ghs.google.com' => 'Google Docs',
76 'aspmx.l.google.com' => 'GMail',
77 'googlemail.com' => 'GMail'
dbbee58 @benbalter actually works now
authored
78 ),
79 );
fb4ef4e @benbalter comment goodness
authored
80
81 //user agent to identify as
cf8448c @benbalter basic class structure
authored
82 public $ua = 'Site Inspector';
83
705a34e @benbalter bug fixes
authored
84 //whether to follow location headers
85 public $follow = 5;
86
cf8448c @benbalter basic class structure
authored
87 public $data = null;
4fe1102 @benbalter initial commit
authored
88
fb4ef4e @benbalter comment goodness
authored
89 /**
90 * Initiates the class
91 * @since 0.1
92 */
4fe1102 @benbalter initial commit
authored
93 function __construct() {
cf8448c @benbalter basic class structure
authored
94 self::$instance = $this;
4fe1102 @benbalter initial commit
authored
95 }
96
fb4ef4e @benbalter comment goodness
authored
97 /**
98 * Allows user to overload data array
99 * @since 0.1
100 * @param string $name data key
101 * @param mixed $value data value
102 */
cf8448c @benbalter basic class structure
authored
103 function __set( $name, $value ) {
9edbbcc @benbalter it works!
authored
104 $this->data[ $name ] = $value;
cf8448c @benbalter basic class structure
authored
105 }
106
fb4ef4e @benbalter comment goodness
authored
107 /**
108 * Returns property from data array
109 * @since 0.1
110 * @param string $name data key
111 * @returns mixed the value requested
112 */
cf8448c @benbalter basic class structure
authored
113 function __get( $name ) {
9edbbcc @benbalter it works!
authored
114
115 if ( array_key_exists($name, $this->data) )
116 return $this->data[ $name ];
cf8448c @benbalter basic class structure
authored
117
118 $trace = debug_backtrace();
119 trigger_error(
120 'Undefined property via __get(): ' . $name .
121 ' in ' . $trace[0]['file'] .
122 ' on line ' . $trace[0]['line'],
123 E_USER_NOTICE);
124 return null;
125 }
70b54af @benbalter https
authored
126
127 function check_https( $domain = '' ) {
128
129 $domain = $this->get_domain( $domain );
130 $domain = 'https://' . $this->remove_http( $domain );
131
132 $args = array( 'user-agent' => $this->ua, 'sslverify' => false );
133 $get = $this->maybe_remote_get( $domain, $args);
134
135 if ( is_wp_error( $get ) )
136 return false;
137
138 return true;
139
140 }
4fe1102 @benbalter initial commit
authored
141
70b54af @benbalter https
authored
142 function check_apps( $body, $apps, $script = false ) {
1314f75 @benbalter cleanup
authored
143 //TO DO
164d1fd @benbalter switched find/label order
authored
144
1314f75 @benbalter cleanup
authored
145 /**
146 * Should Check inside script tags
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
147 */
148 $output = array();
70b54af @benbalter https
authored
149
150 //this is a javascript file, just check the whole thing
151 if ( $script ) {
4a4d459 script search fix
bitnami authored
152
153 foreach ( $apps as $search=>$app ) {
154
155 if ( preg_match_all( "/$search/i", $body, $matches) != 0 )
156 $output[] = $app;
70b54af @benbalter https
authored
157 }
158 return $output;
159 }
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
160
70b54af @benbalter https
authored
161
162 //grab external scripts
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
163 preg_match_all( '/<script[^>]* src=(\"|\')([^>]*)(\"|\')[^>]*>/i', $body, $matches);
70b54af @benbalter https
authored
164
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
165 foreach ( $matches[2] as $url ) {
166 $args = array( 'user-agent' => $this->ua );
167 $data = wp_remote_retrieve_body( $this->maybe_remote_get( $this->url_to_absolute( $this->domain, $url ), $args) );
168 if ( $data )
70b54af @benbalter https
authored
169 $output = array_merge( $output, $this->check_apps( $data, $apps, true ) );
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
170 }
1314f75 @benbalter cleanup
authored
171
70b54af @benbalter https
authored
172 //loop and regex
164d1fd @benbalter switched find/label order
authored
173 foreach ( $apps as $search=>$app ) {
4a4d459 script search fix
bitnami authored
174
cf742d4 @benbalter script / css detection fix
authored
175 //look inside link attributes to find CSS files with app names in path
176 if ( preg_match_all( '/<link[^>]+' . $search. '[^>]+>/i', $body, $matches) != 0 )
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
177 $output[] = $app;
cf742d4 @benbalter script / css detection fix
authored
178
179 //Look inside script tags
180 $found_tags = preg_match_all( "#<script[\s\S]*?>[\s\S]*?</script>#si", $body, $matches);
181 if ( $found_tags ) {
182 foreach( $matches[0] as $match) {
183 if ( preg_match ( '/$search/ism', $body) )
184 $output[] = $app;
185 }
186 }
4fe1102 @benbalter initial commit
authored
187
cf742d4 @benbalter script / css detection fix
authored
188 }
164d1fd @benbalter switched find/label order
authored
189
70b54af @benbalter https
authored
190 //should fix this
191 return array_unique( $output );
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
192
193
4fe1102 @benbalter initial commit
authored
194 }
195
25b4a42 @benbalter cleaned up helper functions
authored
196 /**
197 * Checks a domain to see if there's a CNAME or A record on the non-www domain
198 *
199 * Updates $this->domain to www. if there's no non-www support
200 * @since 0.1
201 * @param string $domain the domain
202 * @return bool true if non-www works, otherwise false
203 */
204 function check_nonwww( $domain = '' ) {
205
206 $domain = $this->get_domain( $domain );
9edbbcc @benbalter it works!
authored
207
25b4a42 @benbalter cleaned up helper functions
authored
208 //grab the DNS
9edbbcc @benbalter it works!
authored
209 $dns = $this->get_dns_record( $domain );
25b4a42 @benbalter cleaned up helper functions
authored
210
211 //check for for CNAME or A record on non-www
d4fde74 @benbalter location header bug fix
authored
212 foreach ( $dns as $d ) {
dbbee58 @benbalter actually works now
authored
213
d4fde74 @benbalter location header bug fix
authored
214 foreach ( $d as $record ) {
dbbee58 @benbalter actually works now
authored
215 if ( isset( $record['type'] ) && ( $record['type'] == 'A' || $record['type'] == 'CNAME' ) )
216 return true;
217 }
218
4fe1102 @benbalter initial commit
authored
219 }
220
25b4a42 @benbalter cleaned up helper functions
authored
221 //if there's no non-www, subsequent actions should be taken on www. instead of the TLD.
9edbbcc @benbalter it works!
authored
222 $this->domain = $this->maybe_add_www ( $domain );
d4fde74 @benbalter location header bug fix
authored
223
25b4a42 @benbalter cleaned up helper functions
authored
224 return false;
4fe1102 @benbalter initial commit
authored
225
226 }
25b4a42 @benbalter cleaned up helper functions
authored
227
228 /**
229 * Loops through an array of needles to see if any are in the haystack
230 * @param array $needles array of needle strings
dbbee58 @benbalter actually works now
authored
231 * @param array $haystack the haystack
25b4a42 @benbalter cleaned up helper functions
authored
232 * @returns string|bool needle if found, otherwise false
233 * @since 0.1
234 */
dbbee58 @benbalter actually works now
authored
235 function find_needles_in_haystack( $haystack, $key, $needle ) {
b1e4aea @benbalter goops apps fix
authored
236
dbbee58 @benbalter actually works now
authored
237 $needles = $this->searches[$needle];
d4fde74 @benbalter location header bug fix
authored
238
b1e4aea @benbalter goops apps fix
authored
239 // echo "HAYSTACK: $haystack, NEEDLES: ";
240 //print_r( $needles );
241 //echo "\n";
242
243 foreach ( $needles as $n => $label ) {
d4fde74 @benbalter location header bug fix
authored
244
dbbee58 @benbalter actually works now
authored
245 if ( stripos( $haystack, $n ) !== FALSE ) {
4fe1102 @benbalter initial commit
authored
246
dbbee58 @benbalter actually works now
authored
247 $this->data[$needle] = $label;
248 return;
249 }
250 }
251
25b4a42 @benbalter cleaned up helper functions
authored
252 return false;
dbbee58 @benbalter actually works now
authored
253
4fe1102 @benbalter initial commit
authored
254 }
255
dbbee58 @benbalter actually works now
authored
256
1314f75 @benbalter cleanup
authored
257 /**
258 * Checks for an AAAA record on a domain
259 * @since 0.1
260 * @param array $dns the DNS Records
261 * @returns bool true if ipv6, otherwise false
262 */
263 function check_ipv6 ( $dns = '' ) {
264
265 if ( $dns == '' )
b975b05 @benbalter validation
authored
266 $dns = $this->get_dns_record();
1314f75 @benbalter cleanup
authored
267
dbbee58 @benbalter actually works now
authored
268 foreach ( $dns as $domain ) {
25b4a42 @benbalter cleaned up helper functions
authored
269
dbbee58 @benbalter actually works now
authored
270 foreach ($domain as $record) {
271 if ( isset($record['type']) && $record['type'] == 'AAAA') {
25b4a42 @benbalter cleaned up helper functions
authored
272 return true;
dbbee58 @benbalter actually works now
authored
273 }
fb4ef4e @benbalter comment goodness
authored
274 }
275 }
276
25b4a42 @benbalter cleaned up helper functions
authored
277 return false;
fb4ef4e @benbalter comment goodness
authored
278
dbbee58 @benbalter actually works now
authored
279 }
280
fb4ef4e @benbalter comment goodness
authored
281 /**
25b4a42 @benbalter cleaned up helper functions
authored
282 * Helper function to allow domain arguments to be optional
283 *
284 * If domain is passed as an arg, will return that, otherwise will check $this->domain for the domain
285 * @since 0.1
286 * @param string $domain the domain
287 * @returns string the true domain
288 */
289 function get_domain( $domain ) {
9edbbcc @benbalter it works!
authored
290
25b4a42 @benbalter cleaned up helper functions
authored
291 if ( $domain != '' )
292 return $domain;
293
294 if ( $this->domain == '' )
295 die('No Domain Supplied.');
296
297 return $this->domain;
298
299 }
300
301 /**
302 * Retrieves DNS record and caches to $this->data
303 * @param string $domain the domain
304 * @returns array dns data
305 * @since 0.1
306 */
307 function get_dns_record( $domain = '' ) {
dbbee58 @benbalter actually works now
authored
308
9edbbcc @benbalter it works!
authored
309 $domain = $this->remove_http( $this->get_domain( $domain ) );
310
dbbee58 @benbalter actually works now
authored
311 if ( !isset( $this->data['dns'][ $domain ] ) )
d4bb0a3 ptr record fail fix
bitnami authored
312 $this->data['dns'][ $domain ] = dns_get_record( $domain, DNS_ALL - DNS_PTR );
dbbee58 @benbalter actually works now
authored
313
314 return $this->dns[ $domain ];
25b4a42 @benbalter cleaned up helper functions
authored
315
316 }
317
318 /**
fb4ef4e @benbalter comment goodness
authored
319 * Main function of the class; propegates data array
320 * @since 0.1
321 * @param string $domain domain to inspect
322 * @returns array data array
323 */
cf8448c @benbalter basic class structure
authored
324 function inspect ( $domain = '' ) {
9edbbcc @benbalter it works!
authored
325
326 //cleanup public vars
327 $this->body = '';
328 $this->headers = '';
329 $this->data = array();
b975b05 @benbalter validation
authored
330
9edbbcc @benbalter it works!
authored
331 //set the public if an arg is passed
cf8448c @benbalter basic class structure
authored
332 if ( $domain != '' )
b975b05 @benbalter validation
authored
333 $this->domain = $domain;
334
cf8448c @benbalter basic class structure
authored
335 //if we don't have a domain, kick
336 if ( $this->domain == '')
337 return false;
9edbbcc @benbalter it works!
authored
338
339
cf8448c @benbalter basic class structure
authored
340 //cleanup domain
cea657c @benbalter reverse DNS lookups for cloud and cdn
authored
341 $this->domain = strtolower( $this->domain );
342 $this->domain = trim( $this->domain );
cf8448c @benbalter basic class structure
authored
343 $this->maybe_add_http( );
344 $this->remove_www( );
9edbbcc @benbalter it works!
authored
345
4fe1102 @benbalter initial commit
authored
346 //check nonwww
9edbbcc @benbalter it works!
authored
347 $this->nonwww = $this->check_nonwww( );
70b54af @benbalter https
authored
348 $this->https = $this->check_https( );
4fe1102 @benbalter initial commit
authored
349
350 //get DNS
dbbee58 @benbalter actually works now
authored
351 $this->get_dns_record( $this->domain );
4fe1102 @benbalter initial commit
authored
352
353 //IP & Host
2218362 @benbalter json
authored
354 $this->ip = gethostbyname( $this->remove_http( $this->domain ) );
dbbee58 @benbalter actually works now
authored
355 foreach ( gethostbynamel( $this->remove_http( $this->domain ) ) as $ip )
356 $this->data['hosts'][$ip] = gethostbyaddr( $ip );
4fe1102 @benbalter initial commit
authored
357
cf8448c @benbalter basic class structure
authored
358 //grab the page
359 $data = $this->remote_get( $this->domain );
b975b05 @benbalter validation
authored
360
cf8448c @benbalter basic class structure
authored
361 //if there was an error, kick
25b4a42 @benbalter cleaned up helper functions
authored
362 if ( !$data ) {
9edbbcc @benbalter it works!
authored
363 $this->status = 'unreachable';
cf8448c @benbalter basic class structure
authored
364 return false;
dbbee58 @benbalter actually works now
authored
365 } else {
366 $this->status = 'live';
25b4a42 @benbalter cleaned up helper functions
authored
367 }
9edbbcc @benbalter it works!
authored
368
369 $this->body = $data['body'];
7dacfe1 @benbalter md5
authored
370 $this->md5 = md5( $this->body );
9edbbcc @benbalter it works!
authored
371 $this->headers = $data['headers'];
dbbee58 @benbalter actually works now
authored
372
373 if ( isset( $data['headers']['server'] ) ) {
374 $this->server_software = $data['headers']['server'];
375 }
376
cea657c @benbalter reverse DNS lookups for cloud and cdn
authored
377 //merge DNS and hosts from reverse DNS lookup
378 $haystack = array_merge( $this->dns, $this->hosts );
d4bb0a3 ptr record fail fix
bitnami authored
379
0da810e @benbalter ipv6 six
authored
380 //IPv6
381 $this->ipv6 = $this->check_ipv6( $this->dns );
382
dbbee58 @benbalter actually works now
authored
383 //check CDN
cea657c @benbalter reverse DNS lookups for cloud and cdn
authored
384 array_walk_recursive( $haystack, array( &$this, 'find_needles_in_haystack'), 'cdn');
25b4a42 @benbalter cleaned up helper functions
authored
385
dbbee58 @benbalter actually works now
authored
386 //check cloud
cea657c @benbalter reverse DNS lookups for cloud and cdn
authored
387 array_walk_recursive( $haystack, array( &$this, 'find_needles_in_haystack'), 'cloud');
4fe1102 @benbalter initial commit
authored
388
dbbee58 @benbalter actually works now
authored
389 //check google apps
cea657c @benbalter reverse DNS lookups for cloud and cdn
authored
390 array_walk_recursive( $haystack, array( &$this, 'find_needles_in_haystack'), 'gapps');
dbbee58 @benbalter actually works now
authored
391
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
392 $this->cms = $this->check_apps( $this->body, $this->searches['cms'] );
393 $this->analytics = $this->check_apps( $this->body, $this->searches['analytics'] );
394 $this->scripts = $this->check_apps( $this->body, $this->searches['scripts'] );
cf8448c @benbalter basic class structure
authored
395
9edbbcc @benbalter it works!
authored
396 asort( $this->data );
397
b975b05 @benbalter validation
authored
398 return $this->data;
cf8448c @benbalter basic class structure
authored
399 }
400
fb4ef4e @benbalter comment goodness
authored
401 /**
402 * Smart remote get function
403 *
404 * Prefers wp_remote_get, but falls back to file_get_contents
405 * @param $domain string site to retrieve
406 * @returns array assoc. array of page data
407 * @since 0.1
408 */
25b4a42 @benbalter cleaned up helper functions
authored
409 function remote_get( $domain = '' ) {
fb4ef4e @benbalter comment goodness
authored
410
25b4a42 @benbalter cleaned up helper functions
authored
411 $domain = $this->get_domain( $domain );
cf8448c @benbalter basic class structure
authored
412
dbbee58 @benbalter actually works now
authored
413 $this->get_dns_record( $this->remove_trailing_slash( $domain ) );
414
415 $args = array( 'redirection' => 0, 'user-agent' => $this->ua );
416
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
417 $data = $this->maybe_remote_get( $domain, $args );
418
d4fde74 @benbalter location header bug fix
authored
419 //if there was an error, try to grab the headers to potentially follow a location header
420 if ( is_wp_error( $data ) ) {
421 $data = array( 'headers' => wp_remote_retrieve_headers( $domain ) );
422 if ( is_wp_error( $data ) )
2218362 @benbalter json
authored
423 return false;
d4fde74 @benbalter location header bug fix
authored
424 }
425
705a34e @benbalter bug fixes
authored
426 $data = $this->maybe_follow_location_header ( $data );
dbbee58 @benbalter actually works now
authored
427
428 return $data;
4fe1102 @benbalter initial commit
authored
429 }
cf8448c @benbalter basic class structure
authored
430
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
431 function maybe_remote_get( $url, $args ) {
432 if ( !($data = get_transient( $url ) ) ) {
433 $data = wp_remote_get( $url , $args);
434 set_transient( $url, $data, $this->cachelife );
435 }
436
437 return $data;
438
439 }
440
705a34e @benbalter bug fixes
authored
441 function maybe_follow_location_header ( $data ) {
442
443 //check flag
444 if ( !$this->follow )
445 return $data;
446
447 //if there's a location header, follow
dbbee58 @benbalter actually works now
authored
448 if ( !isset ( $data['headers']['location'] ) )
705a34e @benbalter bug fixes
authored
449 return $data;
450
451 //store the redirect
dbbee58 @benbalter actually works now
authored
452 $this->data['redirect'][] = array( 'code' => wp_remote_retrieve_response_code( $data ), 'destination' => $data['headers']['location'] );
705a34e @benbalter bug fixes
authored
453
dbbee58 @benbalter actually works now
authored
454 if ( sizeof( $this->data['redirect'] ) < $this->follow )
455 $data = $this->remote_get( $data['headers']['location'] );
705a34e @benbalter bug fixes
authored
456
457 return $data;
458 }
459
fb4ef4e @benbalter comment goodness
authored
460 /**
461 * Conditionally prepends http:// to a string
462 * @since 0.1
463 * @param string $input domain to modify
464 * @returns string modified domain
465 */
cf8448c @benbalter basic class structure
authored
466 function maybe_add_http( $input = '' ) {
467
25b4a42 @benbalter cleaned up helper functions
authored
468 $domain = $this->get_domain( $input );
cf8448c @benbalter basic class structure
authored
469
470 $domain = ( substr( $domain, 0, 7) == 'http://' ) ? $domain : 'http://' . $domain;
471
9edbbcc @benbalter it works!
authored
472
cf8448c @benbalter basic class structure
authored
473 //if no domain was passed, asume we should update the class
474 if ( $input == '' )
475 $this->domain = $domain;
476
477 return $domain;
478
479 }
480
9edbbcc @benbalter it works!
authored
481 function remove_http ( $input ) {
482
483 $domain = $this->get_domain( $input );
484
485 //kill the http
486 $domain = str_ireplace('http://', '', $domain);
487
488 //if no domain arg was passed, update the class
489 if ( $input == '' )
490 $this->domain = $domain;
491
492 return $domain;
493 }
494
fb4ef4e @benbalter comment goodness
authored
495 /**
496 * Removes www from domains
497 * @since 0.1
498 * @param string $input domain
499 * @returns string domain with www removed
500 */
cf8448c @benbalter basic class structure
authored
501 function remove_www( $input = '' ) {
502
25b4a42 @benbalter cleaned up helper functions
authored
503 $domain = $this->get_domain( $input );
cf8448c @benbalter basic class structure
authored
504
505 //force http so check will work
506 $domain = $this->maybe_add_http( $domain );
507
508 //kill the www
509 $domain = str_ireplace('http://www.', 'http://', $domain);
510
511 //if no domain arg was passed, update the class
512 if ( $input == '' )
513 $this->domain = $domain;
514
515 return $domain;
516
517 }
25b4a42 @benbalter cleaned up helper functions
authored
518
519 /**
520 * Conditionally adds www to a domain
521 * @since 0.1
522 * @param string $input the domain
523 * @returns string the domain with www.
524 */
9edbbcc @benbalter it works!
authored
525 function maybe_add_www ( $input = '' ) {
25b4a42 @benbalter cleaned up helper functions
authored
526
527 $domain = $this->get_domain( $input );
528
529 //force http so check will work
530 $domain = $this->maybe_add_http( $domain );
531
532 //check if it's already there
533 if ( strpos( $domain, 'http://www.' ) !== FALSE )
534 return $domain;
535
536 //add the www
537 $domain = str_ireplace('http://', 'http://www.', $domain);
538
539 //if no domain arg was passed, update the class
540 if ( $input == '' )
541 $this->domain = $domain;
542
543 return $domain;
544 }
dbbee58 @benbalter actually works now
authored
545
546 function remove_trailing_slash( $domain ) {
547
548 if ( substr( $domain, -1, 1) == '/' )
549 return substr( $domain, 0, -1);
550
551 return $domain;
552
553 }
6f4c3ff @benbalter caching, cms, scripts, analytics
authored
554 /**
555 * Converts a relative URL (/bar) to an absolute URL (http://www.foo.com/bar)
556 *
557 * Inspired from code available at http://nadeausoftware.com/node/79,
558 * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
559 *
560 * @params string $baseUrl Directory of linking page
561 * @params string $relativeURL URL to convert to absolute
562 * @return string Absolute URL
563 */
564 function url_to_absolute( $baseUrl, $relativeUrl ) {
565 // If relative URL has a scheme, clean path and return.
566 $r = $this->split_url( $relativeUrl );
567 if ( $r === FALSE )
568 return FALSE;
569 if ( !empty( $r['scheme'] ) )
570 {
571 if ( !empty( $r['path'] ) && $r['path'][0] == '/' )
572 $r['path'] = $this->url_remove_dot_segments( $r['path'] );
573 return $this->join_url( $r );
574 }
575
576 // Make sure the base URL is absolute.
577 $b = $this->split_url( $baseUrl );
578 if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) )
579 return FALSE;
580 $r['scheme'] = $b['scheme'];
581
582 // If relative URL has an authority, clean path and return.
583 if ( isset( $r['host'] ) )
584 {
585 if ( !empty( $r['path'] ) )
586 $r['path'] = $this->url_remove_dot_segments( $r['path'] );
587 return join_url( $r );
588 }
589 unset( $r['port'] );
590 unset( $r['user'] );
591 unset( $r['pass'] );
592
593 // Copy base authority.
594 $r['host'] = $b['host'];
595 if ( isset( $b['port'] ) ) $r['port'] = $b['port'];
596 if ( isset( $b['user'] ) ) $r['user'] = $b['user'];
597 if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass'];
598
599 // If relative URL has no path, use base path
600 if ( empty( $r['path'] ) )
601 {
602 if ( !empty( $b['path'] ) )
603 $r['path'] = $b['path'];
604 if ( !isset( $r['query'] ) && isset( $b['query'] ) )
605 $r['query'] = $b['query'];
606 return $this->join_url( $r );
607 }
608
609 // If relative URL path doesn't start with /, merge with base path
610 if ( $r['path'][0] != '/' )
611 {
612 $base = mb_strrchr( $b['path'], '/', TRUE, 'UTF-8' );
613 if ( $base === FALSE ) $base = '';
614 $r['path'] = $base . '/' . $r['path'];
615 }
616 $r['path'] = $this->url_remove_dot_segments( $r['path'] );
617 return $this->join_url( $r );
618 }
619
620 /**
621 * Required function of URL to absolute
622 *
623 * Inspired from code available at http://nadeausoftware.com/node/79,
624 * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
625 *
626 */
627 function url_remove_dot_segments( $path ) {
628 // multi-byte character explode
629 $inSegs = preg_split( '!/!u', $path );
630 $outSegs = array( );
631 foreach ( $inSegs as $seg )
632 {
633 if ( $seg == '' || $seg == '.')
634 continue;
635 if ( $seg == '..' )
636 array_pop( $outSegs );
637 else
638 array_push( $outSegs, $seg );
639 }
640 $outPath = implode( '/', $outSegs );
641 if ( $path[0] == '/' )
642 $outPath = '/' . $outPath;
643 // compare last multi-byte character against '/'
644 if ( $outPath != '/' &&
645 (mb_strlen($path)-1) == mb_strrpos( $path, '/', 'UTF-8' ) )
646 $outPath .= '/';
647 return $outPath;
648 }
649
650 /**
651 * Required function of URL to absolute
652 *
653 * Inspired from code available at http://nadeausoftware.com/node/79,
654 * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
655 *
656 */
657 function split_url( $url, $decode=TRUE )
658 {
659 $xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
660 $xpchar = $xunressub . ':@%';
661
662 $xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)';
663
664 $xuserinfo = '(([' . $xunressub . '%]*)' .
665 '(:([' . $xunressub . ':%]*))?)';
666
667 $xipv4 = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})';
668
669 $xipv6 = '(\[([a-fA-F\d.:]+)\])';
670
671 $xhost_name = '([a-zA-Z\d-.%]+)';
672
673 $xhost = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')';
674 $xport = '(\d*)';
675 $xauthority = '((' . $xuserinfo . '@)?' . $xhost .
676 '?(:' . $xport . ')?)';
677
678 $xslash_seg = '(/[' . $xpchar . ']*)';
679 $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))';
680 $xpath_rel = '([' . $xpchar . ']+' . $xslash_seg . '*)';
681 $xpath_abs = '(/(' . $xpath_rel . ')?)';
682 $xapath = '(' . $xpath_authabs . '|' . $xpath_abs .
683 '|' . $xpath_rel . ')';
684
685 $xqueryfrag = '([' . $xpchar . '/?' . ']*)';
686
687 $xurl = '^(' . $xscheme . ':)?' . $xapath . '?' .
688 '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$';
689
690
691 // Split the URL into components.
692 if ( !preg_match( '!' . $xurl . '!', $url, $m ) )
693 return FALSE;
694
695 if ( !empty($m[2]) ) $parts['scheme'] = strtolower($m[2]);
696
697 if ( !empty($m[7]) ) {
698 if ( isset( $m[9] ) ) $parts['user'] = $m[9];
699 else $parts['user'] = '';
700 }
701 if ( !empty($m[10]) ) $parts['pass'] = $m[11];
702
703 if ( !empty($m[13]) ) $h=$parts['host'] = $m[13];
704 else if ( !empty($m[14]) ) $parts['host'] = $m[14];
705 else if ( !empty($m[16]) ) $parts['host'] = $m[16];
706 else if ( !empty( $m[5] ) ) $parts['host'] = '';
707 if ( !empty($m[17]) ) $parts['port'] = $m[18];
708
709 if ( !empty($m[19]) ) $parts['path'] = $m[19];
710 else if ( !empty($m[21]) ) $parts['path'] = $m[21];
711 else if ( !empty($m[25]) ) $parts['path'] = $m[25];
712
713 if ( !empty($m[27]) ) $parts['query'] = $m[28];
714 if ( !empty($m[29]) ) $parts['fragment']= $m[30];
715
716 if ( !$decode )
717 return $parts;
718 if ( !empty($parts['user']) )
719 $parts['user'] = rawurldecode( $parts['user'] );
720 if ( !empty($parts['pass']) )
721 $parts['pass'] = rawurldecode( $parts['pass'] );
722 if ( !empty($parts['path']) )
723 $parts['path'] = rawurldecode( $parts['path'] );
724 if ( isset($h) )
725 $parts['host'] = rawurldecode( $parts['host'] );
726 if ( !empty($parts['query']) )
727 $parts['query'] = rawurldecode( $parts['query'] );
728 if ( !empty($parts['fragment']) )
729 $parts['fragment'] = rawurldecode( $parts['fragment'] );
730 return $parts;
731 }
732
733 /**
734 * Required function of URL to absolute
735 *
736 * Inspired from code available at http://nadeausoftware.com/node/79,
737 * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
738 *
739 */
740 function join_url( $parts, $encode=TRUE )
741 {
742 if ( $encode )
743 {
744 if ( isset( $parts['user'] ) )
745 $parts['user'] = rawurlencode( $parts['user'] );
746 if ( isset( $parts['pass'] ) )
747 $parts['pass'] = rawurlencode( $parts['pass'] );
748 if ( isset( $parts['host'] ) &&
749 !preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) )
750 $parts['host'] = rawurlencode( $parts['host'] );
751 if ( !empty( $parts['path'] ) )
752 $parts['path'] = preg_replace( '!%2F!ui', '/',
753 rawurlencode( $parts['path'] ) );
754 if ( isset( $parts['query'] ) )
755 $parts['query'] = rawurlencode( $parts['query'] );
756 if ( isset( $parts['fragment'] ) )
757 $parts['fragment'] = rawurlencode( $parts['fragment'] );
758 }
759
760 $url = '';
761 if ( !empty( $parts['scheme'] ) )
762 $url .= $parts['scheme'] . ':';
763 if ( isset( $parts['host'] ) )
764 {
765 $url .= '//';
766 if ( isset( $parts['user'] ) )
767 {
768 $url .= $parts['user'];
769 if ( isset( $parts['pass'] ) )
770 $url .= ':' . $parts['pass'];
771 $url .= '@';
772 }
773 if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) )
774 $url .= '[' . $parts['host'] . ']'; // IPv6
775 else
776 $url .= $parts['host']; // IPv4 or name
777 if ( isset( $parts['port'] ) )
778 $url .= ':' . $parts['port'];
779 if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' )
780 $url .= '/';
781 }
782 if ( !empty( $parts['path'] ) )
783 $url .= $parts['path'];
784 if ( isset( $parts['query'] ) )
785 $url .= '?' . $parts['query'];
786 if ( isset( $parts['fragment'] ) )
787 $url .= '#' . $parts['fragment'];
788 return $url;
789 }
790
4fe1102 @benbalter initial commit
authored
791
792 }
793
794 ?>
Something went wrong with that request. Please try again.