## A simple example of using  Selenium with Python to scrape the friends data from facebook

### Imports

In [1]:
from selenium import webdriver
import time
from urllib.parse import urlsplit, parse_qs

### Login function

This function will provide our login operation

In [2]:
def login_facebook(browser, login=None, password=None):
    """
    Performs the login event to facebook
    """
    
    # Reads login and pass from login.py
    if login is None or password is None:
        from login import INFO
        login = INFO["login"]
        password = INFO["pass"]
    
    # Goes to mobile version of facebook
    browser.get('https://m.facebook.com')
    try:
        # Finds three login fields
        login_css = browser.find_element_by_id('m_login_email')
        password_css = browser.find_element_by_css_selector('input.bl.bm.bo')
        button = browser.find_element_by_css_selector('input[name="login"]')
    except:
        print("!!! You logged in or cannot log in now !!!")
        return
    # Puts the data into the fields
    login_css.send_keys(login)
    password_css.send_keys(password)
    
    # Presses the sign in button
    button.click()
    time.sleep(3)
    
    # Additional checker feature
    if ("Log In With One Tap" in browser.page_source):
        checker_css = browser.find_elements_by_css_selector('input[value="OK"]')
        if len(checker_css):
            checker_css[0].click()
            time.sleep(3)

    print("Current_url %s" % browser.current_url)
    print("Logged in...")

### Friends Scrapper Function
This function will do the actual work of scraping friends

In [3]:
def friends_scrapper(browser, pg_id):
    """
    Using browser and pg_id reads user's friends list
    and returns it as a list of tuples (name, id)
    """
    
    url = "https://m.facebook.com/%s/friends" % pg_id
    browser.get(url)
    
    time.sleep(1.5)
    # Initialize the list of names and ids
    ids_list = list()
    
    # Initialise the scroller
    while 1:
        # Scroll down to bottom and get the content
        names = browser.find_elements_by_css_selector('table[role="presentation"] td > a')
        for user_index in range(1, len(names)-7):
            try:
                link = names[user_index].get_attribute("href")
                name = names[user_index].text
                # print(name)
                # print(link)
            except Exception as e:
                # self.add_error(e)
                name = ""
                link = ""
            if name:
                ids_list.append( (name, link_converter(link)) )
        # Go to the next page
        scroller = browser.find_elements_by_css_selector("#m_more_friends a")
        if(len(scroller) == 0): break;
        scroller[0].click()
        

    print("Scrolled to the bottom...")
    return ids_list

### Link Converter
There are two types of links to get ids from:

PHP and Modern

https://m.facebook.com/profile.php?id=100015004564464&fref=fr_tab

https://m.facebook.com/dobosevych?fref=fr_tab

In [8]:
def link_converter(link):
    url = urlsplit(link)
    query = url.query
    params = parse_qs(query)
    if "/profile.php" in url.path:
        return params["id"]
    return url.path[1:]

## Let's try it out --

### Creating a browser instance

In [5]:
browser = webdriver.Firefox()

In [6]:
login_facebook(browser)

Current_url https://m.facebook.com/home.php?_rdr
Logged in...


In [9]:
browser.get("https://m.facebook.com/me")
user_id = link_converter(browser.current_url)

In [10]:
print(friends_scrapper(browser, user_id))

Scrolled to the bottom...
[('Лука Тріска', 'triskaluka'), ('Volodymyr Zabulskyy', 'vzabulskyy'), ('Pavlo Kachmar', 'pavlokach'), ('Станіслав Вдович', ['100011401647282']), ('Danil Shankovskiy', ['100010645567246']), ('Semen Senkivskyy', ['100010120716101']), ('Orest Korol', ['100007354367318']), ('Сергій Білоконь', ['100004758851024']), ('Маркіян Водовіз', ['100015004564464']), ('Artem Kuzmych', 'artem.kyzmuch'), ('Vasyl Borsuk', 'vasyl.borsuk.16'), ('Alina Smirnova', 'alina.smir'), ('Дмитро Наконечний', 'dymytriy'), ('Oles Dobosevych', 'dobosevych'), ('George Antentyk', 'yura.antentyk'), ("Мар'яна Темник", ['100004954450521']), ('Maxym Komarenskyy', ['100012368705309']), ('Oles Kozak', 'oles.kozak.3'), ('Roman Vey', 'roman.vey'), ('Uliana Supruniuk', 'uliana.supruniuk'), ('Ira Zakharchenko', 'ira.zakharchenko.1'), ('Nazar Romaniv', 'nazar0romaniv'), ('Роман Козак', ['100004729084569']), ('Mary Hirna', 'marichka.hirna'), ('Ira Kostyshyn', 'ira.kostyshyn.3'), ('Kostya Liepieshov', 'Inko

In [11]:
len([('Лука Тріска', 'triskaluka'), ('Volodymyr Zabulskyy', 'vzabulskyy'), ('Pavlo Kachmar', 'pavlokach'), ('Станіслав Вдович', ['100011401647282']), ('Danil Shankovskiy', ['100010645567246']), ('Semen Senkivskyy', ['100010120716101']), ('Orest Korol', ['100007354367318']), ('Сергій Білоконь', ['100004758851024']), ('Маркіян Водовіз', ['100015004564464']), ('Artem Kuzmych', 'artem.kyzmuch'), ('Vasyl Borsuk', 'vasyl.borsuk.16'), ('Alina Smirnova', 'alina.smir'), ('Дмитро Наконечний', 'dymytriy'), ('Oles Dobosevych', 'dobosevych'), ('George Antentyk', 'yura.antentyk'), ("Мар'яна Темник", ['100004954450521']), ('Maxym Komarenskyy', ['100012368705309']), ('Oles Kozak', 'oles.kozak.3'), ('Roman Vey', 'roman.vey'), ('Uliana Supruniuk', 'uliana.supruniuk'), ('Ira Zakharchenko', 'ira.zakharchenko.1'), ('Nazar Romaniv', 'nazar0romaniv'), ('Роман Козак', ['100004729084569']), ('Mary Hirna', 'marichka.hirna'), ('Ira Kostyshyn', 'ira.kostyshyn.3'), ('Kostya Liepieshov', 'Inkognita.n1')]
   )

26