# Scraper helper class that manages HTTP requests

In [2]:
import requests
import random
import string
from stem import Signal
from stem.control import Controller

In [3]:
class ScraperRequests:
    """Allow to make HTTP requests in a scraping manner, i.e. with timeouts, random IP, user agent, from field.
       It requires a Tor server running on the machine with port 9051 open. 
    """
    
    _user_agents = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
              "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
              "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36",
              "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
              "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"]
    _from_lenght = 8
    _ip_rotate_every = 10

    def _get_random_user_agent(self):
        return random.choice(self._user_agents)

    def _get_random_from(self):
        return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(self._from_lenght)) + "@gmail.com"
    
    def _get_tor_session(self):
        session = requests.session()
        
        # Tor uses the 9050 port as the default socks port
        session.proxies = {'http':  'socks5://127.0.0.1:9050',
                           'https': 'socks5://127.0.0.1:9050'}
        return session
    
    # signal TOR for a new connection 
    def _renew_connection(self):
        with Controller.from_port(port = 9051) as controller:
            controller.authenticate(password="password")
            controller.signal(Signal.NEWNYM)
            pass
    
    def __init__(self, verbose=False):
        self._verbose = verbose
        self._ip_counter = 0
    
    def make_get(self, url):
        if self._ip_counter > self._ip_rotate_every:
            self._renew_connection()
            self._ip_counter = 0
            
        session = self._get_tor_session()
        if self._verbose:
            print(session.get("http://httpbin.org/ip").text)
        self._ip_counter += 1
        headers = {
            'User-Agent': self._get_random_user_agent(),
            'From': self._get_random_from()
        }
        return session.get(url, headers=headers)