In [1]:
!pip install scrapy



In [2]:
import scrapy
from scrapy.crawler import CrawlerProcess
import json
import csv

In [3]:
class Olx(scrapy.Spider):
    name = 'olx'
    
    url = 'https://www.olx.in/api/relevance/v2/search?category=84&facet_limit=100&lang=en&location=4059014&location_facet_limit=20&platform=web-desktop&size=40&user=16fecf81399x6039b937'
    
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
    }
    
    def __init__(self):
        with open('olxcarsdata.csv', 'w') as csv_file:
            csv_file.write('title,location,price,body_type,brand,model,variant,year,fuel,transmission,kms,owners\n')
    
    def start_requests(self):
        for page in range(0, 10):
            yield scrapy.Request(url=self.url + '&page=' + str(page), headers=self.headers, callback=self.parse)
    
    def parse(self, res):
        data = res.text
        data = json.loads(data)
        
        for offer in data['data']:
            items = {
                'title': offer['title'],
                'location': offer['locations_resolved']['COUNTRY_name'] + ', ' +
                            offer['locations_resolved']['ADMIN_LEVEL_1_name'] + ', ' +
                            offer['locations_resolved']['ADMIN_LEVEL_3_name'] + ', ' +
                            offer['locations_resolved']['SUBLOCALITY_LEVEL_1_name'],
                'price': offer['price']['value']['display'][2:],
                'body_type': offer['car_body_type'],
                'brand' : offer['parameters'][0]['value_name'],
                'model' : offer['parameters'][1]['value_name'],
                'variant' : offer['parameters'][2]['value_name'],
                'year' : offer['parameters'][3]['value_name'],
                'fuel' : offer['parameters'][4]['value_name'],
                'transmission' : offer['parameters'][5]['value_name'],
                'kms' : offer['parameters'][6]['value_name'],
                'owners' : offer['parameters'][7]['value_name'][:1]
                
            }
            
            print(json.dumps(items, indent=2))
            
            with open('olxcarsdata.csv', 'a') as csv_file:
                writer = csv.DictWriter(csv_file, fieldnames=items.keys())
                writer.writerow(items)
    
    



In [4]:
# run scraper
process = CrawlerProcess()
process.crawl(Olx)
process.start()

# debug
#Olx.parse(Olx, '')

2020-12-27 19:40:16 [scrapy.utils.log] INFO: Scrapy 2.4.1 started (bot: scrapybot)
2020-12-27 19:40:16 [scrapy.utils.log] INFO: Versions: lxml 4.1.1.0, libxml2 2.9.7, cssselect 1.1.0, parsel 1.6.0, w3lib 1.22.0, Twisted 20.3.0, Python 3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 10:22:32) [MSC v.1900 64 bit (AMD64)], pyOpenSSL 17.5.0 (OpenSSL 1.0.2u  20 Dec 2019), cryptography 2.1.4, Platform Windows-10-10.0.18362-SP0
2020-12-27 19:40:16 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.selectreactor.SelectReactor
2020-12-27 19:40:16 [scrapy.crawler] INFO: Overridden settings:
{}
2020-12-27 19:40:16 [scrapy.extensions.telnet] INFO: Telnet Password: 5a4b130a4092bac2
2020-12-27 19:40:17 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.logstats.LogStats']
2020-12-27 19:40:17 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuth

{
  "title": "Audi Q7 3.0 TDI quattro Premium, 2010, Diesel",
  "location": "India, Maharashtra, Pune, Central Street Camp",
  "price": "15,50,000",
  "body_type": "LUXURY_SUV",
  "brand": "Audi",
  "model": "Q7",
  "variant": "3.0 TDI quattro Premium",
  "year": "2010.0",
  "fuel": "Diesel",
  "transmission": "Automatic",
  "kms": "82000",
  "owners": "2"
}
{
  "title": "Skoda Superb Elegance 1.8 TSI Automatic, 2010, Petrol",
  "location": "India, Maharashtra, Pune, Vishrant Wadi",
  "price": "4,29,999",
  "body_type": "LUXURY_SEDAN",
  "brand": "Skoda",
  "model": "Superb",
  "variant": "2008-2013 Elegance 1.8 TSI AT",
  "year": "2010.0",
  "fuel": "Petrol",
  "transmission": "Automatic",
  "kms": "59000",
  "owners": "2"
}
{
  "title": "Hyundai Fluidic Verna 1.6 CRDi SX Automatic, 2012, Petrol",
  "location": "India, Maharashtra, Pune, Vishrant Wadi",
  "price": "4,99,999",
  "body_type": "SEDAN",
  "brand": "Hyundai",
  "model": "Fluidic Verna",
  "variant": "1.6 CRDi SX Automatic"

2020-12-27 19:40:18 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.olx.in/api/relevance/v2/search?category=84&facet_limit=100&lang=en&location=4059014&location_facet_limit=20&platform=web-desktop&size=40&user=16fecf81399x6039b937&page=1> (referer: None)
2020-12-27 19:40:18 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.olx.in/api/relevance/v2/search?category=84&facet_limit=100&lang=en&location=4059014&location_facet_limit=20&platform=web-desktop&size=40&user=16fecf81399x6039b937&page=1> (referer: None)
Traceback (most recent call last):
  File "C:\Users\ehtesham\Anaconda3\lib\site-packages\twisted\internet\defer.py", line 654, in _runCallbacks
    current.result = callback(current.result, *args, **kw)
  File "<ipython-input-3-f7d0bf8fb4b9>", line 30, in parse
    'body_type': offer['car_body_type'],
KeyError: 'car_body_type'
2020-12-27 19:40:18 [scrapy.core.engine] INFO: Closing spider (finished)
2020-12-27 19:40:18 [scrapy.statscollectors] INFO: Dump

{
  "title": "Hyundai i20 Active 1.2 SX, 2017, Petrol",
  "location": "India, Maharashtra, Pune, Rasta Peth",
  "price": "7,30,000",
  "body_type": "HATCHBACK",
  "brand": "Hyundai",
  "model": "i20 Active",
  "variant": "1.2 SX",
  "year": "2017",
  "fuel": "Petrol",
  "transmission": "Manual",
  "kms": "10244",
  "owners": "1"
}
{
  "title": "Maruti Suzuki Baleno Delta Diesel, 2017, Diesel",
  "location": "India, Maharashtra, Pune, Shivaji Nagar",
  "price": "7,25,000",
  "body_type": "HATCHBACK",
  "brand": "Maruti Suzuki",
  "model": "Baleno",
  "variant": "1.3 Delta",
  "year": "2017",
  "fuel": "Diesel",
  "transmission": "Manual",
  "kms": "66449",
  "owners": "1"
}
{
  "title": "Audi Q3 2.0 TDI quattro Premium, 2012, Diesel",
  "location": "India, Maharashtra, Pune, Shivaji Nagar",
  "price": "14,95,000",
  "body_type": "LUXURY_SUV",
  "brand": "Audi",
  "model": "Q3",
  "variant": "2.0 TDI quattro Premium",
  "year": "2012",
  "fuel": "Diesel",
  "transmission": "Automatic",
 