Permalink
Browse files

Latest Ultimate Web Scraper Toolkit.

  • Loading branch information...
cubiclesoft committed Apr 4, 2018
1 parent 7066e8d commit b82b53b217221ca136788672f8a079738f152d32
Showing with 55 additions and 13 deletions.
  1. +24 −7 client/support/http.php
  2. +29 −4 client/support/web_browser.php
  3. +1 −1 client/support/wkfs_functions.php
  4. +1 −1 server/support/wkfs_functions.php
View
@@ -360,6 +360,21 @@ public static function NormalizeHeaders($headers)
return $result;
}
public static function MergeRawHeaders(&$headers, $rawheaders)
{
foreach ($rawheaders as $name => $val)
{
$val = self::HeaderValueCleanup($val);
if ($val != "")
{
$name2 = self::HeaderNameCleanup($name);
if (isset($headers[$name2])) unset($headers[$name2]);
$headers[$name] = $val;
}
}
}
public static function ExtractHeader($data)
{
$result = array();
@@ -1337,6 +1352,7 @@ public static function RetrieveWebpage($url, $options = array())
// Cleanup input headers.
if (!isset($options["headers"])) $options["headers"] = array();
$options["headers"] = self::NormalizeHeaders($options["headers"]);
if (isset($options["rawheaders"])) self::MergeRawHeaders($options["headers"], $options["rawheaders"]);
// Process the proxy URL (if specified).
$useproxy = (isset($options["proxyurl"]) && trim($options["proxyurl"]) != "");
@@ -1366,15 +1382,16 @@ public static function RetrieveWebpage($url, $options = array())
$proxydata .= "Host: " . $host . ($defaultport ? "" : ":" . $port) . "\r\n";
$proxydata .= "Proxy-Connection: keep-alive\r\n";
if ($proxyusername != "") $proxydata .= "Proxy-Authorization: BASIC " . base64_encode($proxyusername . ":" . $proxypassword) . "\r\n";
if (isset($options["proxyheaders"]))
if (!isset($options["proxyheaders"])) $options["proxyheaders"] = array();
$options["proxyheaders"] = self::NormalizeHeaders($options["proxyheaders"]);
if (isset($options["rawproxyheaders"])) self::MergeRawHeaders($options["proxyheaders"], $options["rawproxyheaders"]);
unset($options["proxyheaders"]["Accept-Encoding"]);
foreach ($options["proxyheaders"] as $name => $val)
{
$options["proxyheaders"] = self::NormalizeHeaders($options["proxyheaders"]);
unset($options["proxyheaders"]["Accept-Encoding"]);
foreach ($options["proxyheaders"] as $name => $val)
{
if ($name != "Content-Type" && $name != "Content-Length" && $name != "Proxy-Connection" && $name != "Host") $proxydata .= $name . ": " . $val . "\r\n";
}
if ($name != "Content-Type" && $name != "Content-Length" && $name != "Proxy-Connection" && $name != "Host") $proxydata .= $name . ": " . $val . "\r\n";
}
$proxydata .= "\r\n";
if (isset($options["debug_callback"]) && is_callable($options["debug_callback"])) call_user_func_array($options["debug_callback"], array("rawproxyheaders", $proxydata, &$options["debug_callback_opts"]));
}
@@ -182,6 +182,7 @@ public function ProcessState(&$state)
$result["url"] = $state["url"];
unset($state["options"]["files"]);
unset($state["options"]["body"]);
unset($state["tempoptions"]["headers"]["Content-Type"]);
$result["options"] = $state["options"];
$result["firstreqts"] = $state["startts"];
$result["numredirects"] = $state["numredirects"];
@@ -352,10 +353,23 @@ public function ProcessState(&$state)
return $state["result"];
}
public function Process($url, $profile = "auto", $tempoptions = array())
public function Process($url, $tempoptions = array())
{
$startts = microtime(true);
$redirectts = $startts;
// Handle older function call: Process($url, $profile, $tempoptions)
if (is_string($tempoptions))
{
$args = func_get_args();
if (count($args) < 3) $tempoptions = array();
else $tempoptions = $args[2];
$tempoptions["profile"] = $args[1];
}
$profile = (isset($tempoptions["profile"]) ? $tempoptions["profile"] : "auto");
if (isset($tempoptions["timeout"])) $timeout = $tempoptions["timeout"];
else if (isset($this->data["httpopts"]["timeout"])) $timeout = $this->data["httpopts"]["timeout"];
else $timeout = false;
@@ -427,7 +441,7 @@ public function ProcessAsync__Handler($mode, &$data, $key, &$info)
if ($info["init"]) $data = $info["keep"];
else
{
$info["result"] = $this->Process($info["url"], $info["profile"], $info["tempoptions"]);
$info["result"] = $this->Process($info["url"], $info["tempoptions"]);
if (!$info["result"]["success"])
{
$info["keep"] = false;
@@ -493,16 +507,27 @@ public function ProcessAsync__Handler($mode, &$data, $key, &$info)
}
}
public function ProcessAsync($helper, $key, $callback, $url, $profile = "auto", $tempoptions = array())
public function ProcessAsync($helper, $key, $callback, $url, $tempoptions = array())
{
$tempoptions["async"] = true;
// Handle older function call: ProcessAsync($helper, $key, $callback, $url, $profile, $tempoptions)
if (is_string($tempoptions))
{
$args = func_get_args();
if (count($args) < 6) $tempoptions = array();
else $tempoptions = $args[5];
$tempoptions["profile"] = $args[4];
}
$profile = (isset($tempoptions["profile"]) ? $tempoptions["profile"] : "auto");
$info = array(
"init" => false,
"keep" => true,
"callback" => $callback,
"url" => $url,
"profile" => $profile,
"tempoptions" => $tempoptions,
"result" => false
);
@@ -198,7 +198,7 @@ private function RunAPI($data)
)
);
$result = $web->Process($this->config["url"], "auto", $options);
$result = $web->Process($this->config["url"], $options);
if (!$result["success"]) return $result;
@@ -198,7 +198,7 @@ private function RunAPI($data)
)
);
$result = $web->Process($this->config["url"], "auto", $options);
$result = $web->Process($this->config["url"], $options);
if (!$result["success"]) return $result;

0 comments on commit b82b53b

Please sign in to comment.