Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 825 lines (643 sloc) 22.047 kb
4fe1102 Ben Balter initial commit
authored
1 <?php
fb4ef4e Ben Balter comment goodness
authored
2 /**
3 * Site Inspector Class
4 *
5 * @author Benjamin J. Blater
6 * @version 0.1
7 * @pacakge siteinspector
8 * @license GPL2
9 */
4fe1102 Ben Balter initial commit
authored
10
fb4ef4e Ben Balter comment goodness
authored
11 class SiteInspector {
4fe1102 Ben Balter initial commit
authored
12
cf8448c Ben Balter basic class structure
authored
13 static $instance;
14
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
15 public $cachelife = 3600;
16
fb4ef4e Ben Balter comment goodness
authored
17 //defaults to look for; can be overriden by user
164d1fd Ben Balter switched find/label order
authored
18 //format [search] => [label]
dbbee58 Ben Balter actually works now
authored
19 public $searches = array(
20
21 'cloud' => array(
164d1fd Ben Balter switched find/label order
authored
22 'amazon'=>'Amazon',
23 'rackspace' => 'Rackspace'
dbbee58 Ben Balter actually works now
authored
24 ),
4fe1102 Ben Balter initial commit
authored
25
dbbee58 Ben Balter actually works now
authored
26 'cdn' => array(
164d1fd Ben Balter switched find/label order
authored
27 'Akamai' => 'akamai',
28 'edgekey.net' => 'Akamai',
29 'akam.net' => 'Akamai',
30 'akadns.net' => 'Akamai',
dbbee58 Ben Balter actually works now
authored
31 ),
4fe1102 Ben Balter initial commit
authored
32
dbbee58 Ben Balter actually works now
authored
33 'cms' => array(
164d1fd Ben Balter switched find/label order
authored
34 'joomla' => 'Joomla',
35 'wordpress' => 'WordPress',
36 'wp-content' => 'WordPress',
37 'drupal' => 'Drupal',
70b54af Ben Balter https
authored
38 'sites\/default\/' => 'Drupal',
39 'sites\/all\/' => 'Drupal',
164d1fd Ben Balter switched find/label order
authored
40 'xoops' => 'Xoops',
41 'mediawiki' => 'MediaWiki',
42 'php-nuke' => 'PHP-Nuke',
43 'typepad' => 'Typepad',
2a64488 Ben Balter join_url fatal error fix
authored
44 'Movable Type' => 'Moveable Type',
164d1fd Ben Balter switched find/label order
authored
45 'bbpress' => 'BBPress',
46 'blogger' => 'Blogger',
47 'sharepoint' => 'Sharepoint',
48 'zencart' => 'Zencart',
49 'phpbb' => 'PhpBB',
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
50 'tumblr' => 'tumblr',
164d1fd Ben Balter switched find/label order
authored
51 'liferay' => 'Liferay',
dbbee58 Ben Balter actually works now
authored
52 ),
4fe1102 Ben Balter initial commit
authored
53
dbbee58 Ben Balter actually works now
authored
54 'analytics' => array(
164d1fd Ben Balter switched find/label order
authored
55 'google-analytics' => 'Google Analytics',
56 'ga.js' => 'Google Analytics',
2a64488 Ben Balter join_url fatal error fix
authored
57 'ua-[0-9]{8}-[0-9]' => 'Google Analytics',
4a4d459 script search fix
bitnami authored
58 '_gaq' => 'Google Analytics',
164d1fd Ben Balter switched find/label order
authored
59 'quantcast' => 'Quantcast',
60 'disqus' => 'Disqus',
61 'GetSatisfaction' => 'GetSatisfaction',
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
62 'AdSense' => 'AdSense',
63 'AddThis' => 'AddThis',
dbbee58 Ben Balter actually works now
authored
64 ),
4fe1102 Ben Balter initial commit
authored
65
dbbee58 Ben Balter actually works now
authored
66 'scripts' => array(
06ba7f9 Ben Balter proto and dojo string fixes
authored
67 '__proto__' => 'Prototype',
164d1fd Ben Balter switched find/label order
authored
68 'jquery' => 'jQuery',
69 'mootools' => 'Mootools',
06ba7f9 Ben Balter proto and dojo string fixes
authored
70 'dojo\.' => 'Dojo',
164d1fd Ben Balter switched find/label order
authored
71 'scriptalicious' => 'Scriptaculous',
dbbee58 Ben Balter actually works now
authored
72 ),
73
74 'gapps' => array (
164d1fd Ben Balter switched find/label order
authored
75 'ghs.google.com' => 'Google Docs',
76 'aspmx.l.google.com' => 'GMail',
77 'googlemail.com' => 'GMail'
dbbee58 Ben Balter actually works now
authored
78 ),
79 );
fb4ef4e Ben Balter comment goodness
authored
80
81 //user agent to identify as
cf8448c Ben Balter basic class structure
authored
82 public $ua = 'Site Inspector';
83
705a34e Ben Balter bug fixes
authored
84 //whether to follow location headers
85 public $follow = 5;
86
cf8448c Ben Balter basic class structure
authored
87 public $data = null;
4fe1102 Ben Balter initial commit
authored
88
fb4ef4e Ben Balter comment goodness
authored
89 /**
90 * Initiates the class
91 * @since 0.1
92 */
4fe1102 Ben Balter initial commit
authored
93 function __construct() {
cf8448c Ben Balter basic class structure
authored
94 self::$instance = $this;
4fe1102 Ben Balter initial commit
authored
95 }
96
fb4ef4e Ben Balter comment goodness
authored
97 /**
98 * Allows user to overload data array
99 * @since 0.1
100 * @param string $name data key
101 * @param mixed $value data value
102 */
cf8448c Ben Balter basic class structure
authored
103 function __set( $name, $value ) {
9edbbcc Ben Balter it works!
authored
104 $this->data[ $name ] = $value;
cf8448c Ben Balter basic class structure
authored
105 }
106
fb4ef4e Ben Balter comment goodness
authored
107 /**
108 * Returns property from data array
109 * @since 0.1
110 * @param string $name data key
111 * @returns mixed the value requested
112 */
cf8448c Ben Balter basic class structure
authored
113 function __get( $name ) {
9edbbcc Ben Balter it works!
authored
114
115 if ( array_key_exists($name, $this->data) )
116 return $this->data[ $name ];
cf8448c Ben Balter basic class structure
authored
117
118 $trace = debug_backtrace();
119 trigger_error(
120 'Undefined property via __get(): ' . $name .
121 ' in ' . $trace[0]['file'] .
122 ' on line ' . $trace[0]['line'],
123 E_USER_NOTICE);
124 return null;
125 }
70b54af Ben Balter https
authored
126
127 function check_https( $domain = '' ) {
128
129 $domain = $this->get_domain( $domain );
130 $domain = 'https://' . $this->remove_http( $domain );
131
132 $args = array( 'user-agent' => $this->ua, 'sslverify' => false );
133 $get = $this->maybe_remote_get( $domain, $args);
134
135 if ( is_wp_error( $get ) )
136 return false;
137
138 return true;
139
140 }
4fe1102 Ben Balter initial commit
authored
141
70b54af Ben Balter https
authored
142 function check_apps( $body, $apps, $script = false ) {
1314f75 Ben Balter cleanup
authored
143 //TO DO
164d1fd Ben Balter switched find/label order
authored
144
1314f75 Ben Balter cleanup
authored
145 /**
146 * Should Check inside script tags
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
147 */
148 $output = array();
70b54af Ben Balter https
authored
149
150 //this is a javascript file, just check the whole thing
151 if ( $script ) {
4a4d459 script search fix
bitnami authored
152
153 foreach ( $apps as $search=>$app ) {
154
06ba7f9 Ben Balter proto and dojo string fixes
authored
155 if ( preg_match_all( "/$search/i", $body, $matches) != 0 )
4a4d459 script search fix
bitnami authored
156 $output[] = $app;
70b54af Ben Balter https
authored
157 }
158 return $output;
159 }
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
160
70b54af Ben Balter https
authored
161
162 //grab external scripts
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
163 preg_match_all( '/<script[^>]* src=(\"|\')([^>]*)(\"|\')[^>]*>/i', $body, $matches);
70b54af Ben Balter https
authored
164
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
165 foreach ( $matches[2] as $url ) {
2a64488 Ben Balter join_url fatal error fix
authored
166
167 //exclude addthis because it will trip every CMS search
168 if ( stripos( $url, 'addthis.com' ) !== false )
169 continue;
170
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
171 $args = array( 'user-agent' => $this->ua );
172 $data = wp_remote_retrieve_body( $this->maybe_remote_get( $this->url_to_absolute( $this->domain, $url ), $args) );
173 if ( $data )
70b54af Ben Balter https
authored
174 $output = array_merge( $output, $this->check_apps( $data, $apps, true ) );
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
175 }
1314f75 Ben Balter cleanup
authored
176
70b54af Ben Balter https
authored
177 //loop and regex
164d1fd Ben Balter switched find/label order
authored
178 foreach ( $apps as $search=>$app ) {
4a4d459 script search fix
bitnami authored
179
2a64488 Ben Balter join_url fatal error fix
authored
180 //look inside link and meta attributes to find app names
181 if ( preg_match_all( '/<(link|meta)[^>]+' . $search . '[^>]+>/i', $body, $matches) != 0 )
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
182 $output[] = $app;
2a64488 Ben Balter join_url fatal error fix
authored
183
cf742d4 Ben Balter script / css detection fix
authored
184 //Look inside script tags
185 $found_tags = preg_match_all( "#<script[\s\S]*?>[\s\S]*?</script>#si", $body, $matches);
186 if ( $found_tags ) {
187 foreach( $matches[0] as $match) {
188 if ( preg_match ( '/$search/ism', $body) )
189 $output[] = $app;
190 }
191 }
4fe1102 Ben Balter initial commit
authored
192
cf742d4 Ben Balter script / css detection fix
authored
193 }
164d1fd Ben Balter switched find/label order
authored
194
70b54af Ben Balter https
authored
195 //should fix this
196 return array_unique( $output );
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
197
198
4fe1102 Ben Balter initial commit
authored
199 }
200
25b4a42 Ben Balter cleaned up helper functions
authored
201 /**
202 * Checks a domain to see if there's a CNAME or A record on the non-www domain
203 *
204 * Updates $this->domain to www. if there's no non-www support
205 * @since 0.1
206 * @param string $domain the domain
207 * @return bool true if non-www works, otherwise false
208 */
209 function check_nonwww( $domain = '' ) {
210
211 $domain = $this->get_domain( $domain );
9edbbcc Ben Balter it works!
authored
212
25b4a42 Ben Balter cleaned up helper functions
authored
213 //grab the DNS
9edbbcc Ben Balter it works!
authored
214 $dns = $this->get_dns_record( $domain );
25b4a42 Ben Balter cleaned up helper functions
authored
215
bb327d0 Ben Balter error handling
authored
216 if ( $dns ) {
217
218 //check for for CNAME or A record on non-www
219 foreach ( $dns as $d ) {
220
221 foreach ( $d as $record ) {
222 if ( isset( $record['type'] ) && ( $record['type'] == 'A' || $record['type'] == 'CNAME' ) )
223 return true;
224 }
225
dbbee58 Ben Balter actually works now
authored
226 }
227
4fe1102 Ben Balter initial commit
authored
228 }
229
25b4a42 Ben Balter cleaned up helper functions
authored
230 //if there's no non-www, subsequent actions should be taken on www. instead of the TLD.
9edbbcc Ben Balter it works!
authored
231 $this->domain = $this->maybe_add_www ( $domain );
d4fde74 Ben Balter location header bug fix
authored
232
25b4a42 Ben Balter cleaned up helper functions
authored
233 return false;
4fe1102 Ben Balter initial commit
authored
234
235 }
25b4a42 Ben Balter cleaned up helper functions
authored
236
237 /**
238 * Loops through an array of needles to see if any are in the haystack
239 * @param array $needles array of needle strings
dbbee58 Ben Balter actually works now
authored
240 * @param array $haystack the haystack
25b4a42 Ben Balter cleaned up helper functions
authored
241 * @returns string|bool needle if found, otherwise false
242 * @since 0.1
243 */
dbbee58 Ben Balter actually works now
authored
244 function find_needles_in_haystack( $haystack, $key, $needle ) {
b1e4aea Ben Balter goops apps fix
authored
245
dbbee58 Ben Balter actually works now
authored
246 $needles = $this->searches[$needle];
d4fde74 Ben Balter location header bug fix
authored
247
b1e4aea Ben Balter goops apps fix
authored
248 // echo "HAYSTACK: $haystack, NEEDLES: ";
249 //print_r( $needles );
250 //echo "\n";
251
252 foreach ( $needles as $n => $label ) {
d4fde74 Ben Balter location header bug fix
authored
253
dbbee58 Ben Balter actually works now
authored
254 if ( stripos( $haystack, $n ) !== FALSE ) {
4fe1102 Ben Balter initial commit
authored
255
dbbee58 Ben Balter actually works now
authored
256 $this->data[$needle] = $label;
257 return;
258 }
259 }
260
25b4a42 Ben Balter cleaned up helper functions
authored
261 return false;
dbbee58 Ben Balter actually works now
authored
262
4fe1102 Ben Balter initial commit
authored
263 }
264
dbbee58 Ben Balter actually works now
authored
265
1314f75 Ben Balter cleanup
authored
266 /**
267 * Checks for an AAAA record on a domain
268 * @since 0.1
269 * @param array $dns the DNS Records
270 * @returns bool true if ipv6, otherwise false
271 */
272 function check_ipv6 ( $dns = '' ) {
273
274 if ( $dns == '' )
b975b05 Ben Balter validation
authored
275 $dns = $this->get_dns_record();
1314f75 Ben Balter cleanup
authored
276
bb327d0 Ben Balter error handling
authored
277 if ( !$dns )
278 return false;
279
dbbee58 Ben Balter actually works now
authored
280 foreach ( $dns as $domain ) {
25b4a42 Ben Balter cleaned up helper functions
authored
281
dbbee58 Ben Balter actually works now
authored
282 foreach ($domain as $record) {
283 if ( isset($record['type']) && $record['type'] == 'AAAA') {
25b4a42 Ben Balter cleaned up helper functions
authored
284 return true;
dbbee58 Ben Balter actually works now
authored
285 }
fb4ef4e Ben Balter comment goodness
authored
286 }
287 }
288
25b4a42 Ben Balter cleaned up helper functions
authored
289 return false;
fb4ef4e Ben Balter comment goodness
authored
290
dbbee58 Ben Balter actually works now
authored
291 }
292
fb4ef4e Ben Balter comment goodness
authored
293 /**
25b4a42 Ben Balter cleaned up helper functions
authored
294 * Helper function to allow domain arguments to be optional
295 *
296 * If domain is passed as an arg, will return that, otherwise will check $this->domain for the domain
297 * @since 0.1
298 * @param string $domain the domain
299 * @returns string the true domain
300 */
301 function get_domain( $domain ) {
9edbbcc Ben Balter it works!
authored
302
25b4a42 Ben Balter cleaned up helper functions
authored
303 if ( $domain != '' )
304 return $domain;
305
306 if ( $this->domain == '' )
307 die('No Domain Supplied.');
308
309 return $this->domain;
310
311 }
312
313 /**
314 * Retrieves DNS record and caches to $this->data
315 * @param string $domain the domain
316 * @returns array dns data
317 * @since 0.1
318 */
319 function get_dns_record( $domain = '' ) {
dbbee58 Ben Balter actually works now
authored
320
9edbbcc Ben Balter it works!
authored
321 $domain = $this->remove_http( $this->get_domain( $domain ) );
322
dbbee58 Ben Balter actually works now
authored
323 if ( !isset( $this->data['dns'][ $domain ] ) )
bb327d0 Ben Balter error handling
authored
324 @ $this->data['dns'][ $domain ] = dns_get_record( $domain, DNS_ALL - DNS_PTR );
dbbee58 Ben Balter actually works now
authored
325
326 return $this->dns[ $domain ];
25b4a42 Ben Balter cleaned up helper functions
authored
327
328 }
329
330 /**
fb4ef4e Ben Balter comment goodness
authored
331 * Main function of the class; propegates data array
332 * @since 0.1
333 * @param string $domain domain to inspect
334 * @returns array data array
335 */
cf8448c Ben Balter basic class structure
authored
336 function inspect ( $domain = '' ) {
9edbbcc Ben Balter it works!
authored
337
338 //cleanup public vars
339 $this->body = '';
340 $this->headers = '';
341 $this->data = array();
b975b05 Ben Balter validation
authored
342
9edbbcc Ben Balter it works!
authored
343 //set the public if an arg is passed
cf8448c Ben Balter basic class structure
authored
344 if ( $domain != '' )
b975b05 Ben Balter validation
authored
345 $this->domain = $domain;
346
cf8448c Ben Balter basic class structure
authored
347 //if we don't have a domain, kick
348 if ( $this->domain == '')
349 return false;
9edbbcc Ben Balter it works!
authored
350
cf8448c Ben Balter basic class structure
authored
351 //cleanup domain
cea657c Ben Balter reverse DNS lookups for cloud and cdn
authored
352 $this->domain = strtolower( $this->domain );
353 $this->domain = trim( $this->domain );
cf8448c Ben Balter basic class structure
authored
354 $this->maybe_add_http( );
355 $this->remove_www( );
9edbbcc Ben Balter it works!
authored
356
4fe1102 Ben Balter initial commit
authored
357 //check nonwww
9edbbcc Ben Balter it works!
authored
358 $this->nonwww = $this->check_nonwww( );
70b54af Ben Balter https
authored
359 $this->https = $this->check_https( );
4fe1102 Ben Balter initial commit
authored
360
361 //get DNS
dbbee58 Ben Balter actually works now
authored
362 $this->get_dns_record( $this->domain );
4fe1102 Ben Balter initial commit
authored
363
364 //IP & Host
2218362 Ben Balter json
authored
365 $this->ip = gethostbyname( $this->remove_http( $this->domain ) );
bb327d0 Ben Balter error handling
authored
366
88a1c99 Ben Balter better error handling
authored
367 $live = false;
bb327d0 Ben Balter error handling
authored
368
369 if ( $ips = gethostbynamel( $this->remove_http( $this->domain ) ) ) {
370 foreach ( $ips as $ip ) {
371
372 //some sites (e.g., privacy.gov) returns localhost as their IP, this prevents scanning self
373 if ( $ip != '127.0.0.1' )
374 $live = true;
375
376 $this->data['hosts'][$ip] = gethostbyaddr( $ip );
377
378 }
88a1c99 Ben Balter better error handling
authored
379
380 }
bb327d0 Ben Balter error handling
authored
381
cf8448c Ben Balter basic class structure
authored
382 //grab the page
88a1c99 Ben Balter better error handling
authored
383 if ( $live )
384 $data = $this->remote_get( $this->domain );
b975b05 Ben Balter validation
authored
385
cf8448c Ben Balter basic class structure
authored
386 //if there was an error, kick
88a1c99 Ben Balter better error handling
authored
387 if ( !$live || !$data ) {
9edbbcc Ben Balter it works!
authored
388 $this->status = 'unreachable';
88a1c99 Ben Balter better error handling
authored
389 return $this->data;
390 } else if ( wp_remote_retrieve_response_code( $data ) > 400 ) {
391 $this->status = wp_remote_retrieve_response_code( $data );
dbbee58 Ben Balter actually works now
authored
392 } else {
393 $this->status = 'live';
25b4a42 Ben Balter cleaned up helper functions
authored
394 }
9edbbcc Ben Balter it works!
authored
395
396 $this->body = $data['body'];
7dacfe1 Ben Balter md5
authored
397 $this->md5 = md5( $this->body );
9edbbcc Ben Balter it works!
authored
398 $this->headers = $data['headers'];
dbbee58 Ben Balter actually works now
authored
399
400 if ( isset( $data['headers']['server'] ) ) {
401 $this->server_software = $data['headers']['server'];
402 }
403
cea657c Ben Balter reverse DNS lookups for cloud and cdn
authored
404 //merge DNS and hosts from reverse DNS lookup
405 $haystack = array_merge( $this->dns, $this->hosts );
d4bb0a3 ptr record fail fix
bitnami authored
406
0da810e Ben Balter ipv6 six
authored
407 //IPv6
408 $this->ipv6 = $this->check_ipv6( $this->dns );
409
dbbee58 Ben Balter actually works now
authored
410 //check CDN
cea657c Ben Balter reverse DNS lookups for cloud and cdn
authored
411 array_walk_recursive( $haystack, array( &$this, 'find_needles_in_haystack'), 'cdn');
25b4a42 Ben Balter cleaned up helper functions
authored
412
dbbee58 Ben Balter actually works now
authored
413 //check cloud
cea657c Ben Balter reverse DNS lookups for cloud and cdn
authored
414 array_walk_recursive( $haystack, array( &$this, 'find_needles_in_haystack'), 'cloud');
4fe1102 Ben Balter initial commit
authored
415
dbbee58 Ben Balter actually works now
authored
416 //check google apps
cea657c Ben Balter reverse DNS lookups for cloud and cdn
authored
417 array_walk_recursive( $haystack, array( &$this, 'find_needles_in_haystack'), 'gapps');
dbbee58 Ben Balter actually works now
authored
418
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
419 $this->cms = $this->check_apps( $this->body, $this->searches['cms'] );
420 $this->analytics = $this->check_apps( $this->body, $this->searches['analytics'] );
421 $this->scripts = $this->check_apps( $this->body, $this->searches['scripts'] );
cf8448c Ben Balter basic class structure
authored
422
9edbbcc Ben Balter it works!
authored
423 asort( $this->data );
424
b975b05 Ben Balter validation
authored
425 return $this->data;
cf8448c Ben Balter basic class structure
authored
426 }
427
fb4ef4e Ben Balter comment goodness
authored
428 /**
429 * Smart remote get function
430 *
431 * Prefers wp_remote_get, but falls back to file_get_contents
432 * @param $domain string site to retrieve
433 * @returns array assoc. array of page data
434 * @since 0.1
435 */
25b4a42 Ben Balter cleaned up helper functions
authored
436 function remote_get( $domain = '' ) {
fb4ef4e Ben Balter comment goodness
authored
437
25b4a42 Ben Balter cleaned up helper functions
authored
438 $domain = $this->get_domain( $domain );
cf8448c Ben Balter basic class structure
authored
439
dbbee58 Ben Balter actually works now
authored
440 $this->get_dns_record( $this->remove_trailing_slash( $domain ) );
441
442 $args = array( 'redirection' => 0, 'user-agent' => $this->ua );
443
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
444 $data = $this->maybe_remote_get( $domain, $args );
88a1c99 Ben Balter better error handling
authored
445
d4fde74 Ben Balter location header bug fix
authored
446 //if there was an error, try to grab the headers to potentially follow a location header
447 if ( is_wp_error( $data ) ) {
88a1c99 Ben Balter better error handling
authored
448
449 if ( $data->get_error_message() == 'connect() timed out!' )
450 return false;
451
d4fde74 Ben Balter location header bug fix
authored
452 $data = array( 'headers' => wp_remote_retrieve_headers( $domain ) );
453 if ( is_wp_error( $data ) )
2218362 Ben Balter json
authored
454 return false;
d4fde74 Ben Balter location header bug fix
authored
455 }
456
705a34e Ben Balter bug fixes
authored
457 $data = $this->maybe_follow_location_header ( $data );
dbbee58 Ben Balter actually works now
authored
458
459 return $data;
4fe1102 Ben Balter initial commit
authored
460 }
cf8448c Ben Balter basic class structure
authored
461
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
462 function maybe_remote_get( $url, $args ) {
463 if ( !($data = get_transient( $url ) ) ) {
464 $data = wp_remote_get( $url , $args);
465 set_transient( $url, $data, $this->cachelife );
466 }
467
468 return $data;
469
470 }
471
705a34e Ben Balter bug fixes
authored
472 function maybe_follow_location_header ( $data ) {
473
474 //check flag
475 if ( !$this->follow )
476 return $data;
477
478 //if there's a location header, follow
dbbee58 Ben Balter actually works now
authored
479 if ( !isset ( $data['headers']['location'] ) )
705a34e Ben Balter bug fixes
authored
480 return $data;
481
482 //store the redirect
dbbee58 Ben Balter actually works now
authored
483 $this->data['redirect'][] = array( 'code' => wp_remote_retrieve_response_code( $data ), 'destination' => $data['headers']['location'] );
705a34e Ben Balter bug fixes
authored
484
dbbee58 Ben Balter actually works now
authored
485 if ( sizeof( $this->data['redirect'] ) < $this->follow )
486 $data = $this->remote_get( $data['headers']['location'] );
705a34e Ben Balter bug fixes
authored
487
488 return $data;
489 }
490
fb4ef4e Ben Balter comment goodness
authored
491 /**
492 * Conditionally prepends http:// to a string
493 * @since 0.1
494 * @param string $input domain to modify
495 * @returns string modified domain
496 */
cf8448c Ben Balter basic class structure
authored
497 function maybe_add_http( $input = '' ) {
498
25b4a42 Ben Balter cleaned up helper functions
authored
499 $domain = $this->get_domain( $input );
cf8448c Ben Balter basic class structure
authored
500
501 $domain = ( substr( $domain, 0, 7) == 'http://' ) ? $domain : 'http://' . $domain;
502
9edbbcc Ben Balter it works!
authored
503
cf8448c Ben Balter basic class structure
authored
504 //if no domain was passed, asume we should update the class
505 if ( $input == '' )
506 $this->domain = $domain;
507
508 return $domain;
509
510 }
511
9edbbcc Ben Balter it works!
authored
512 function remove_http ( $input ) {
513
514 $domain = $this->get_domain( $input );
515
516 //kill the http
517 $domain = str_ireplace('http://', '', $domain);
518
519 //if no domain arg was passed, update the class
520 if ( $input == '' )
521 $this->domain = $domain;
522
523 return $domain;
524 }
525
fb4ef4e Ben Balter comment goodness
authored
526 /**
527 * Removes www from domains
528 * @since 0.1
529 * @param string $input domain
530 * @returns string domain with www removed
531 */
cf8448c Ben Balter basic class structure
authored
532 function remove_www( $input = '' ) {
533
25b4a42 Ben Balter cleaned up helper functions
authored
534 $domain = $this->get_domain( $input );
cf8448c Ben Balter basic class structure
authored
535
536 //force http so check will work
537 $domain = $this->maybe_add_http( $domain );
538
539 //kill the www
540 $domain = str_ireplace('http://www.', 'http://', $domain);
541
542 //if no domain arg was passed, update the class
543 if ( $input == '' )
544 $this->domain = $domain;
545
546 return $domain;
547
548 }
25b4a42 Ben Balter cleaned up helper functions
authored
549
550 /**
551 * Conditionally adds www to a domain
552 * @since 0.1
553 * @param string $input the domain
554 * @returns string the domain with www.
555 */
9edbbcc Ben Balter it works!
authored
556 function maybe_add_www ( $input = '' ) {
25b4a42 Ben Balter cleaned up helper functions
authored
557
558 $domain = $this->get_domain( $input );
559
560 //force http so check will work
561 $domain = $this->maybe_add_http( $domain );
562
563 //check if it's already there
564 if ( strpos( $domain, 'http://www.' ) !== FALSE )
565 return $domain;
566
567 //add the www
568 $domain = str_ireplace('http://', 'http://www.', $domain);
569
570 //if no domain arg was passed, update the class
571 if ( $input == '' )
572 $this->domain = $domain;
573
574 return $domain;
575 }
dbbee58 Ben Balter actually works now
authored
576
577 function remove_trailing_slash( $domain ) {
578
579 if ( substr( $domain, -1, 1) == '/' )
580 return substr( $domain, 0, -1);
581
582 return $domain;
583
584 }
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
585 /**
586 * Converts a relative URL (/bar) to an absolute URL (http://www.foo.com/bar)
587 *
588 * Inspired from code available at http://nadeausoftware.com/node/79,
589 * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
590 *
591 * @params string $baseUrl Directory of linking page
592 * @params string $relativeURL URL to convert to absolute
593 * @return string Absolute URL
594 */
595 function url_to_absolute( $baseUrl, $relativeUrl ) {
596 // If relative URL has a scheme, clean path and return.
597 $r = $this->split_url( $relativeUrl );
598 if ( $r === FALSE )
599 return FALSE;
600 if ( !empty( $r['scheme'] ) )
601 {
602 if ( !empty( $r['path'] ) && $r['path'][0] == '/' )
603 $r['path'] = $this->url_remove_dot_segments( $r['path'] );
604 return $this->join_url( $r );
605 }
606
607 // Make sure the base URL is absolute.
608 $b = $this->split_url( $baseUrl );
609 if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) )
610 return FALSE;
611 $r['scheme'] = $b['scheme'];
612
613 // If relative URL has an authority, clean path and return.
614 if ( isset( $r['host'] ) )
615 {
616 if ( !empty( $r['path'] ) )
617 $r['path'] = $this->url_remove_dot_segments( $r['path'] );
2a64488 Ben Balter join_url fatal error fix
authored
618 return $this->join_url( $r );
6f4c3ff Ben Balter caching, cms, scripts, analytics
authored
619 }
620 unset( $r['port'] );
621 unset( $r['user'] );
622 unset( $r['pass'] );
623
624 // Copy base authority.
625 $r['host'] = $b['host'];
626 if ( isset( $b['port'] ) ) $r['port'] = $b['port'];
627 if ( isset( $b['user'] ) ) $r['user'] = $b['user'];
628 if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass'];
629
630 // If relative URL has no path, use base path
631 if ( empty( $r['path'] ) )
632 {
633 if ( !empty( $b['path'] ) )
634 $r['path'] = $b['path'];
635 if ( !isset( $r['query'] ) && isset( $b['query'] ) )
636 $r['query'] = $b['query'];
637 return $this->join_url( $r );
638 }
639
640 // If relative URL path doesn't start with /, merge with base path
641 if ( $r['path'][0] != '/' )
642 {
643 $base = mb_strrchr( $b['path'], '/', TRUE, 'UTF-8' );
644 if ( $base === FALSE ) $base = '';
645 $r['path'] = $base . '/' . $r['path'];
646 }
647 $r['path'] = $this->url_remove_dot_segments( $r['path'] );
648 return $this->join_url( $r );
649 }
650
651 /**
652 * Required function of URL to absolute
653 *
654 * Inspired from code available at http://nadeausoftware.com/node/79,
655 * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
656 *
657 */
658 function url_remove_dot_segments( $path ) {
659 // multi-byte character explode
660 $inSegs = preg_split( '!/!u', $path );
661 $outSegs = array( );
662 foreach ( $inSegs as $seg )
663 {
664 if ( $seg == '' || $seg == '.')
665 continue;
666 if ( $seg == '..' )
667 array_pop( $outSegs );
668 else
669 array_push( $outSegs, $seg );
670 }
671 $outPath = implode( '/', $outSegs );
672 if ( $path[0] == '/' )
673 $outPath = '/' . $outPath;
674 // compare last multi-byte character against '/'
675 if ( $outPath != '/' &&
676 (mb_strlen($path)-1) == mb_strrpos( $path, '/', 'UTF-8' ) )
677 $outPath .= '/';
678 return $outPath;
679 }
680
681 /**
682 * Required function of URL to absolute
683 *
684 * Inspired from code available at http://nadeausoftware.com/node/79,
685 * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
686 *
687 */
688 function split_url( $url, $decode=TRUE )
689 {
690 $xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
691 $xpchar = $xunressub . ':@%';
692
693 $xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)';
694
695 $xuserinfo = '(([' . $xunressub . '%]*)' .
696 '(:([' . $xunressub . ':%]*))?)';
697
698 $xipv4 = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})';
699
700 $xipv6 = '(\[([a-fA-F\d.:]+)\])';
701
702 $xhost_name = '([a-zA-Z\d-.%]+)';
703
704 $xhost = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')';
705 $xport = '(\d*)';
706 $xauthority = '((' . $xuserinfo . '@)?' . $xhost .
707 '?(:' . $xport . ')?)';
708
709 $xslash_seg = '(/[' . $xpchar . ']*)';
710 $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))';
711 $xpath_rel = '([' . $xpchar . ']+' . $xslash_seg . '*)';
712 $xpath_abs = '(/(' . $xpath_rel . ')?)';
713 $xapath = '(' . $xpath_authabs . '|' . $xpath_abs .
714 '|' . $xpath_rel . ')';
715
716 $xqueryfrag = '([' . $xpchar . '/?' . ']*)';
717
718 $xurl = '^(' . $xscheme . ':)?' . $xapath . '?' .
719 '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$';
720
721
722 // Split the URL into components.
723 if ( !preg_match( '!' . $xurl . '!', $url, $m ) )
724 return FALSE;
725
726 if ( !empty($m[2]) ) $parts['scheme'] = strtolower($m[2]);
727
728 if ( !empty($m[7]) ) {
729 if ( isset( $m[9] ) ) $parts['user'] = $m[9];
730 else $parts['user'] = '';
731 }
732 if ( !empty($m[10]) ) $parts['pass'] = $m[11];
733
734 if ( !empty($m[13]) ) $h=$parts['host'] = $m[13];
735 else if ( !empty($m[14]) ) $parts['host'] = $m[14];
736 else if ( !empty($m[16]) ) $parts['host'] = $m[16];
737 else if ( !empty( $m[5] ) ) $parts['host'] = '';
738 if ( !empty($m[17]) ) $parts['port'] = $m[18];
739
740 if ( !empty($m[19]) ) $parts['path'] = $m[19];
741 else if ( !empty($m[21]) ) $parts['path'] = $m[21];
742 else if ( !empty($m[25]) ) $parts['path'] = $m[25];
743
744 if ( !empty($m[27]) ) $parts['query'] = $m[28];
745 if ( !empty($m[29]) ) $parts['fragment']= $m[30];
746
747 if ( !$decode )
748 return $parts;
749 if ( !empty($parts['user']) )
750 $parts['user'] = rawurldecode( $parts['user'] );
751 if ( !empty($parts['pass']) )
752 $parts['pass'] = rawurldecode( $parts['pass'] );
753 if ( !empty($parts['path']) )
754 $parts['path'] = rawurldecode( $parts['path'] );
755 if ( isset($h) )
756 $parts['host'] = rawurldecode( $parts['host'] );
757 if ( !empty($parts['query']) )
758 $parts['query'] = rawurldecode( $parts['query'] );
759 if ( !empty($parts['fragment']) )
760 $parts['fragment'] = rawurldecode( $parts['fragment'] );
761 return $parts;
762 }
763
764 /**
765 * Required function of URL to absolute
766 *
767 * Inspired from code available at http://nadeausoftware.com/node/79,
768 * Code distributed under OSI BSD (http://www.opensource.org/licenses/bsd-license.php)
769 *
770 */
771 function join_url( $parts, $encode=TRUE )
772 {
773 if ( $encode )
774 {
775 if ( isset( $parts['user'] ) )
776 $parts['user'] = rawurlencode( $parts['user'] );
777 if ( isset( $parts['pass'] ) )
778 $parts['pass'] = rawurlencode( $parts['pass'] );
779 if ( isset( $parts['host'] ) &&
780 !preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) )
781 $parts['host'] = rawurlencode( $parts['host'] );
782 if ( !empty( $parts['path'] ) )
783 $parts['path'] = preg_replace( '!%2F!ui', '/',
784 rawurlencode( $parts['path'] ) );
785 if ( isset( $parts['query'] ) )
786 $parts['query'] = rawurlencode( $parts['query'] );
787 if ( isset( $parts['fragment'] ) )
788 $parts['fragment'] = rawurlencode( $parts['fragment'] );
789 }
790
791 $url = '';
792 if ( !empty( $parts['scheme'] ) )
793 $url .= $parts['scheme'] . ':';
794 if ( isset( $parts['host'] ) )
795 {
796 $url .= '//';
797 if ( isset( $parts['user'] ) )
798 {
799 $url .= $parts['user'];
800 if ( isset( $parts['pass'] ) )
801 $url .= ':' . $parts['pass'];
802 $url .= '@';
803 }
804 if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) )
805 $url .= '[' . $parts['host'] . ']'; // IPv6
806 else
807 $url .= $parts['host']; // IPv4 or name
808 if ( isset( $parts['port'] ) )
809 $url .= ':' . $parts['port'];
810 if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' )
811 $url .= '/';
812 }
813 if ( !empty( $parts['path'] ) )
814 $url .= $parts['path'];
815 if ( isset( $parts['query'] ) )
816 $url .= '?' . $parts['query'];
817 if ( isset( $parts['fragment'] ) )
818 $url .= '#' . $parts['fragment'];
819 return $url;
820 }
821
4fe1102 Ben Balter initial commit
authored
822
823 }
824
825 ?>
Something went wrong with that request. Please try again.