In [None]:
import os
import sys
import time
import urllib
import re

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
import pandas as pd
import yaml

with open('config.yml') as f:
    CONFIG = yaml.safe_load(f)

BASE_URL = 'https://www.facebook.com/'

def init_driver():
    """ 
    Permet de créer et de configurer le browser 
    Retourne le webdriver browser
    """
    chrome_options = webdriver.ChromeOptions()
    prefs = {"profile.default_content_setting_values.notifications" : 2}
    chrome_options.add_experimental_option("prefs",prefs)
    chrome_options.add_argument("--start-maximized")
    driver = webdriver.Chrome(chrome_options=chrome_options)
    return driver
 
    
def log_to(driver, osn_user, osn_pass):
    """ 
    Permet de se connecter au compte FB d'un utilisateur 
    args: driver = browser, osn_user = username ou email, osn_pass = password
    Retourne True en cas de succes sinon False
    """
    driver.get(BASE_URL)
    try:
        username = driver.find_element_by_name("email")
        password = driver.find_element_by_name("pass")
        username.send_keys(osn_user)
        password.send_keys(osn_pass)        
        driver.find_element_by_css_selector('#loginbutton input').click()
        driver.save_screenshot('screenshots/'+osn_user+'.png')
        return True
    except TimeoutException:
        print("Un probleme de connexion")
        return False

In [None]:
def get_section_next_to_friends_section(driver):
    selectors = ['#pagelet_timeline_medley_music', '#pagelet_timeline_medley_books', 
                '#pagelet_timeline_medley_photos', '#pagelet_timeline_medley_videos']
    for selector in selectors:
        try:
            return driver.find_element_by_css_selector(selector)
        except Exception:
            return None

        
def get_friends(driver):     
    """ 
    Permet d'extraire les blocs HTML contenant la liste des amis
    args: driver = browser
    Retourne la liste des amis
    """   
    user_friends_url = get_profile_url(driver)
    
    driver.get(user_friends_url)
    
    bg = driver.find_element_by_css_selector('body') 
    try:
        next_section = None
        while next_section is None:
            bg.send_keys(Keys.END)
            next_section = get_section_next_to_friends_section(driver)
        return driver.find_elements_by_css_selector('[data-testid="friend_list_item"]')
    except Exception:
        print("Unhandled error")
        return None
        
    
def get_fb_id_from_url(url):
    """ 
    Permet d'extraire l'identificateur d'un utilisateur d'une url (username/id) et spécifie son type
    Args: url = url d'un utilisateur fb
    Return l'id ou le username
    """
    res = re.search('facebook.com/([a-zA-Z0-9\.-]+).*', url)
    if res:
        return res.group(1), 'username'
    res = re.search('id=([0-9]+).*', url)
    if res:
        return res.group(1), 'id'
    
    raise Exception("Couldn't extract FB id from the url: ", url)
        
        
def make_friends_data(friends):
    """ 
    Permet d'extraire les données des amis
    args: friends = la liste des blocs html des amis
    Retourne une liste contenant les données des amis
              Friend_data: Name, url, img_url, meta_text, id, username
    """
    friends_data = []
    for friend in friends:
        friend_data = {}
        img_link = friend.find_element_by_css_selector('a')
        try:
            img = friend.find_element_by_css_selector('a > img')
            # si cette instruction ne s'execute pas, cela veut dire
            # que l'utilisateur est désactivé
        except Exception:
            continue
        
        friend_data['name'] = img.get_attribute('aria-label')
        friend_data['url'] = img_link.get_attribute('href').split('?')[0]
        friend_data['img_url'] = img.get_attribute('src')
        friend_data['meta_text'] = friend.text
        
        try:
            friend_data['id'], _ = get_fb_id_from_url(img_link.get_attribute('data-hovercard'))
        except Exception:
            print('Warning: couldnt get the FB id for:', friend_data['url'])
            continue
        try:
            user_id, id_type = get_fb_id_from_url(friend_data['url'])
            if id_type == 'username':
                friend_data['username'] = user_id
        except Exception:
            pass
            
        friends_data.append(friend_data)
    return friends_data

def save_thumb_img_friends(driver, friends_data):
    """ 
    Permet d enregistrer la photo thumbnail des amis
    args: driver l'instance du browser, friends = la liste des blocs html des amis
    Retourne une liste contenant les données des amis
              Friend_data: Name, url, img_url, meta_text, id, username
    """
    for friend_data in friends_data:
        img_src = friend_data['img_url']        
        urllib.urlretrieve(img_src, 'images/'+friend_data['id']+".thumb.jpg")
    
def get_full_img_friends(driver, friends_data):
    for friend_data in friends_data:
        driver.get(friend_data['url'])
        profile_pic = driver.find_element_by_css_selector('.profilePicThumb')
        link = profile_pic.get_attribute('href')
        driver.get(link)
        
        try:
            shared_image = driver.find_element_by_css_selector('.spotlight')
        except Exception:
            print('Warning: No spotlight')
            shared_image = None       
        
        if shared_image!=None:
            img_src = shared_image.get_attribute('src')
        else:
            img_src = driver.find_element_by_css_selector('img.img').get_attribute('src')
        
        urllib.urlretrieve(img_src, 'images/'+friend_data['id']+".jpg")

def get_messages_to(user_id):     
    driver.get(BASE_URL+'/messages/t/'+user_id)
    div_messages = driver.find_element_by_css_selector('._4u-c._1wfr._9hq')
    driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', div_messages)
    return div_messages

def get_profile_url(driver)
    profile_url = driver.find_element_by_css_selector('[title="Profile"]').get_attribute('href')
    user_id, id_type = get_fb_id_from_url(profile_url)
    
    if id_type == 'username':
        user_friends_url = profile_url+'/friends'
    else:
        user_friends_url = '%sprofile.php?id=%s&sk=friends' % (BASE_URL, user_id)
        
    return user_friends_url
    

def about_user(driver):
    if (driver.current_url != BASE_URL)
        raise Exception("Ne peut pas lancer une tel action a partir de cette url: ", driver.current_url)
        
    about_url = driver.find_element_by_css_selector('[data-tab-key="about"]').get_attribute('href')
    user_id, id_type = get_fb_id_from_url(about_url)
    
    if id_type == 'username':
        user_friends_url = about_url+'/about'
    else:
        user_friends_url = '%sprofile.php?id=%s&sk=friends' % (BASE_URL, user_id)
        
    driver.get(user_friends_url)
     
    return None

In [None]:
#if __name__ == "__main__":
print('init driver')
driver = init_driver()
print('logging in')
logged_in = log_to(driver, CONFIG["auth"]['fb_user'] , CONFIG['auth']['fb_pass']) 

In [None]:
if(logged_in):
    messages = get_messages_to('100015877170169')
    print(messages.text)

In [None]:
print('getting friends items')
friends = get_friends(driver)
print('extracting data from friends items')
friends_data = make_friends_data(friends)
df = pd.DataFrame(friends_data)
#df.to_csv('friends.csv', index=False, encoding='utf-8')

#get_full_img_friends(driver, friends_data)

#driver.quit() 

In [None]:
df.head()