Skip to content

Commit

Permalink
Fixed error crawling CSS files
Browse files Browse the repository at this point in the history
  • Loading branch information
avara1986 committed Jun 18, 2015
1 parent a27284c commit cfb72f3
Showing 1 changed file with 18 additions and 15 deletions.
33 changes: 18 additions & 15 deletions src/CrawlerBundle/Controller/WebsiteController.php
Original file line number Diff line number Diff line change
Expand Up @@ -121,22 +121,25 @@ private function checkAndSaveCSS(Crawler $crawler, Website $web){
$url_original = $node->attr('href');
$url = preg_replace("/(https?|ftp):\/\//","",$url_original);
$url = str_replace($web->getUrl(),"",$url);
if (!preg_match("/^\//", $url)){
$url = "/".$url;
$url = preg_replace("/\?(.*)/", "", $url);
if(!preg_match("/fonts\.googleapis\.com/", $url) && preg_match("/\.css/", $url)){
if (!preg_match("/^\//", $url)){
$url = "/".$url;
}
try {
$css_content_original = file_get_contents("http://".$web->getUrl()."".$url);
} catch (\Exception $e) {
$css_content_original = "";
}
$css = $em->getRepository('CrawlerBundle:Css')->findOneBy(array('website' =>$web ,'file' => $url));
if(count($css)==0) {
$css = $this->saveCSS($url, $css_content_original, $web, $em);
}
$result_css[]= array(
'id' => $css->getId(),
'url' => "http://".$web->getUrl()."".$url,
);
}
try {
$css_content_original = file_get_contents("http://".$web->getUrl()."".$url);
} catch (\Exception $e) {
$css_content_original = "";
}
$css = $em->getRepository('CrawlerBundle:Css')->findOneBy(array('website' =>$web ,'file' => $url));
if(count($css)==0) {
$css = $this->saveCSS($url, $css_content_original, $web, $em);
}
$result_css[]= array(
'id' => $css->getId(),
'url' => "http://".$web->getUrl()."".$url,
);
}
$em->flush();
return $result_css;
Expand Down

0 comments on commit cfb72f3

Please sign in to comment.