# Premier League Web Crawler Pt. 1

In [1]:
#IMPORT LIBRARIEs
import scrapy
from scrapy.crawler import CrawlerProcess

In [2]:
#Create the spider class
class premSpider(scrapy.Spider):
    
    name = "premSpider"
    
    custom_settings = {'FEEDS':{'results.csv':{'format':'csv'}}}
    
    def start_requests(self):
        
        # Create a List of Urls with which we wish to scrape
        urls = ['https://www.premierleague.com/players']
        
        #Iterate through each url and send it to be parsed
        
        for url in urls:
            
            #yield kind of acts like return
            yield scrapy.Request(url = url, callback = self.parse)
            
    def parse(self, response):
        
        #extract links to player pages
        plinks = response.xpath('//tr').css('a::attr(href)').extract()
        
        #follow links to specific player pages
        for plink in plinks:
            
            yield response.follow(url = plink, callback = self.parse2)
            
    def parse2(self, response):
        
        plinks2 = response.xpath('//a[@href="stats"]').css('a::attr(href)').extract()
        
        for link2 in plinks2:
            
            yield response.follow(url = link2, callback = self.parse3)
        
    def parse3(self, response):
        
        names = response.xpath('.//div[@class="name t-colour"]/text()').extract()
        appearances = response.xpath('//span[@data-stat="appearances"]//text()').extract_first().strip()
          
        atkstat = []
        for val in response.xpath('//div[@class="normalStat"]/span/span//text()').extract():
            atk = val.strip()
            atkstat.append(atk)
            
            
        statname = []
        for val in response.xpath('//div[@class="normalStat"]/span[@class="stat"]/text()').extract():
            if val != '\n':
                statname.append(val.strip())
        
        stat_dict = dict(zip(statname, atkstat))
                

        
        yield {'Names': names, 'Appearances': appearances, **stat_dict}
        

#initiate crawler process
process = CrawlerProcess()

#Tell the process which spider to use
process.crawl(premSpider)


#start the crawling process
process.start()       
        


2020-12-12 22:38:46 [scrapy.utils.log] INFO: Scrapy 2.4.1 started (bot: scrapybot)
2020-12-12 22:38:46 [scrapy.utils.log] INFO: Versions: lxml 4.4.1.0, libxml2 2.9.9, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.7.4 (default, Aug  9 2019, 18:34:13) [MSC v.1915 64 bit (AMD64)], pyOpenSSL 19.0.0 (OpenSSL 1.1.1d  10 Sep 2019), cryptography 2.7, Platform Windows-10-10.0.18362-SP0
2020-12-12 22:38:46 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2020-12-12 22:38:46 [scrapy.crawler] INFO: Overridden settings:
{}
2020-12-12 22:38:46 [scrapy.extensions.telnet] INFO: Telnet Password: 1ba33ced3e26e172
2020-12-12 22:38:46 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.feedexport.FeedExporter',
 'scrapy.extensions.logstats.LogStats']
2020-12-12 22:38:47 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermi

2020-12-12 22:38:51 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.premierleague.com/players/13286/Tammy-Abraham/stats> (referer: https://www.premierleague.com/players/13286/Tammy-Abraham/overview)
2020-12-12 22:38:51 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.premierleague.com/players/5248/Albian-Ajeti/stats> (referer: https://www.premierleague.com/players/5248/Albian-Ajeti/overview)
2020-12-12 22:38:51 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.premierleague.com/players/10905/Che-Adams/stats>
{'Names': ['Che Adams'], 'Appearances': '41', 'Goals': '7', 'Goals per match': '0.17', 'Headed goals': '0', 'Goals with right foot': '6', 'Goals with left foot': '1', 'Penalties scored': '0', 'Freekicks scored': '0', 'Shots': '56', 'Shots on target': '21', 'Shooting accuracy %': '38%', 'Hit woodwork': '2', 'Big chances missed': '12', 'Assists': '5', 'Passes': '477', 'Passes per match': '11.63', 'Big Chances Created': '9', 'Crosses': '15', 'Yellow cards

2020-12-12 22:38:52 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.premierleague.com/players/4183/Ahmed-El-Mohamady/stats>
{'Names': ['Ahmed El Mohamady'], 'Appearances': '186', 'Clean sheets': '28', 'Goals Conceded': '237', 'Tackles': '268', 'Tackle success %': '76%', 'Last man tackles': '0', 'Blocked shots': '19', 'Interceptions': '246', 'Clearances': '342', 'Headed Clearance': '162', 'Clearances off line': '5', 'Recoveries': '913', 'Duels won': '962', 'Duels lost': '869', 'Successful 50/50s': '128', 'Aerial battles won': '445', 'Aerial battles lost': '410', 'Own goals': '1', 'Errors leading to goal': '1', 'Assists': '13', 'Passes': '6,659', 'Passes per match': '35.8', 'Big Chances Created': '19', 'Crosses': '918', 'Cross accuracy %': '21%', 'Through balls': '7', 'Accurate long balls': '304', 'Yellow cards': '12', 'Red cards': '1', 'Fouls': '153', 'Offsides': '13', 'Goals': '6', 'Headed goals': '4', 'Goals with right foot': '2', 'Goals with left foot': '0', 'Hit woodwork'

2020-12-12 22:38:53 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.premierleague.com/players/10424/Semi-Ajayi/stats> (referer: https://www.premierleague.com/players/10424/Semi-Ajayi/overview)
2020-12-12 22:38:53 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.premierleague.com/players/20559/Alisson/overview> (referer: https://www.premierleague.com/players)
2020-12-12 22:38:53 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.premierleague.com/players/54312/Miguel-Almir%C3%B3n/stats>
{'Names': ['Miguel Almirón'], 'Appearances': '56', 'Goals': '5', 'Goals per match': '0.09', 'Headed goals': '0', 'Goals with right foot': '2', 'Goals with left foot': '3', 'Penalties scored': '0', 'Freekicks scored': '0', 'Shots': '83', 'Shots on target': '29', 'Shooting accuracy %': '35%', 'Hit woodwork': '2', 'Big chances missed': '11', 'Assists': '3', 'Passes': '1,289', 'Passes per match': '23.02', 'Big Chances Created': '4', 'Crosses': '73', 'Cross accuracy %': '22%', 'Th

2020-12-12 22:38:54 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.premierleague.com/players/55603/Ali-Koiki/stats> (referer: https://www.premierleague.com/players/55603/Ali-Koiki/overview)
2020-12-12 22:38:54 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.premierleague.com/players/20053/Ajibola-Alese/stats> (referer: https://www.premierleague.com/players/20053/Ajibola-Alese/overview)
2020-12-12 22:38:54 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.premierleague.com/players/5467/Alex-Telles/stats> (referer: https://www.premierleague.com/players/5467/Alex-Telles/overview)
2020-12-12 22:38:54 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.premierleague.com/players/55603/Ali-Koiki/stats>
{'Names': ['Ali Koiki'], 'Appearances': '0', 'Clean sheets': '0', 'Goals Conceded': '0', 'Tackles': '0', 'Tackle success %': '0%', 'Last man tackles': '0', 'Blocked shots': '0', 'Interceptions': '0', 'Clearances': '0', 'Headed Clearance': '0', 'Clearances 