In [11]:
import os
import sys
import time
import urllib
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
import pandas as pd
import yaml
import csv
import networkx as nx 
import matplotlib.pyplot as plt 
%matplotlib inline


with open('config.yml') as f:
    CONFIG = yaml.safe_load(f)

user_folder = 'data/' + CONFIG["auth"]['fb_user']

BASE_URL = 'https://www.facebook.com/'

def get_friends_section_url(driver):
    """ 
    Permet de recuperer l'url vers la section amis FB
    Args: driver = l'instance du browser
    Return l'url 
    """
    global user_folder
    
    profile_url = driver.find_element_by_css_selector('[title="Profile"]').get_attribute('href')
    user_id, id_type = get_fb_id_from_url(profile_url)    
   
    if id_type == 'username':
        user_friends_url = profile_url+'/friends'
    else:
        user_friends_url = '%sprofile.php?id=%s&sk=friends' % (BASE_URL, user_id)
        
    return user_friends_url


def get_fb_id_from_url(url):
    """ 
    Permet d'extraire l'identificateur d'un utilisateur d'une url (username/id) et spécifie son type
    Args: url = url d'un utilisateur fb
    Return l'id ou le username
    """
    res = re.search('facebook.com/([a-zA-Z0-9\.-]+).*', url)
    if res:
        return res.group(1), 'username'
    res = re.search('id=([0-9]+).*', url)
    if res:
        return res.group(1), 'id'
    
    raise Exception("Couldn't extract FB id from the url: ", url)


def get_section_next_to_friends_section(driver):
    selectors = ['#pagelet_timeline_medley_music', '#pagelet_timeline_medley_books', 
                '#pagelet_timeline_medley_photos', '#pagelet_timeline_medley_videos']
    for selector in selectors:
        try:
            return driver.find_element_by_css_selector(selector)
        except Exception:
            return None

In [12]:
def init_driver():
    """ 
    Permet de créer et de configurer le browser 
    Retourne le webdriver browser
    """
    chrome_options = webdriver.ChromeOptions()
    prefs = {"profile.default_content_setting_values.notifications" : 2}
    chrome_options.add_experimental_option("prefs",prefs)
    chrome_options.add_argument("--start-maximized")
    chrome_options.add_argument("--headless")  
    driver = webdriver.Chrome(chrome_options=chrome_options)
    return driver

def login_to_account(driver, osn_user, osn_pass):
    """ 
    Permet de se connecter au compte FB d'un utilisateur 
    args: driver = browser, osn_user = username ou email, osn_pass = password
    Retourne True en cas de succes sinon False
    """
    global user_folder
    
    if not os.path.exists(user_folder+'/screnshots/'):
        os.makedirs(user_folder+'/screnshots/')
        
    driver.get(BASE_URL)
    try:
        username = driver.find_element_by_name("email")
        password = driver.find_element_by_name("pass")
        username.send_keys(osn_user)
        password.send_keys(osn_pass)        
        driver.find_element_by_css_selector('#loginbutton input').click()
        driver.save_screenshot(user_folder+'/screnshots/'+osn_user+'.png')
        return True
    except TimeoutException:
        print("Un probleme de connexion")
        return False
    
    
def get_friends(driver, friends_section_url):     
    """ 
    Permet d'extraire les blocs HTML contenant la liste des amis
    args: driver = browser
    Retourne la liste des amis
    """   
    
    if(driver.current_url != friends_section_url):
        driver.get(friends_section_url)
    
    bg = driver.find_element_by_css_selector('body') 
    
    try:
        next_section = None
        while next_section is None:
            bg.send_keys(Keys.END)
            next_section = get_section_next_to_friends_section(driver)
        return driver.find_elements_by_css_selector('[data-testid="friend_list_item"]')
    except Exception:
        print("Unhandled error")
        return None
    
        
def make_friends_data(friends):
    """ 
    Permet d'extraire les données des amis
    args: friends = la liste des blocs html des amis
    Retourne une liste contenant les données des amis
              Friend_data: Name, url, img_url, meta_text, id, username
    """
    global user_folder
    
    friends_data = []
    
    for friend in friends:
        friend_data = {}
        img_link = friend.find_element_by_css_selector('a')
        try:
            img = friend.find_element_by_css_selector('a > img')
            # si cette instruction ne s'execute pas, cela veut dire
            # que l'utilisateur est désactivé
        except Exception:
            continue
        
        friend_data['name'] = img.get_attribute('aria-label')
        friend_data['url'] = img_link.get_attribute('href').split('?')[0]
        friend_data['img_url'] = img.get_attribute('src')
        friend_data['meta_text'] = friend.text
        
        try:
            friend_data['id'], _ = get_fb_id_from_url(img_link.get_attribute('data-hovercard'))
        except Exception:
            print('Warning: couldnt get the FB id for:', friend_data['url'])
            continue
        try:
            user_id, id_type = get_fb_id_from_url(friend_data['url'])
            if id_type == 'username':
                friend_data['username'] = user_id
        except Exception:
            pass
            
        friends_data.append(friend_data)
    print("saving friends list into friends.csv")
    df = pd.DataFrame(friends_data)    
    df.to_csv(user_folder+'/friends.csv', sep='\t', encoding='utf-8')
    
    return friends_data

def save_thumb_img_friends(friends_data):
    """ 
    Permet d enregistrer la photo thumbnail des amis
    args:  friends = la liste des blocs html des amis
    """
    global user_folder
    
    if not os.path.exists(user_folder+'/friends_photos'):
        os.makedirs(user_folder+'/friends_photos')
        
    for friend_data in friends_data:
        img_src = friend_data['img_url']        
        urllib.urlretrieve(img_src, user_folder+'/friends_photos/'+friend_data['id']+".thumb.jpg")


def save_full_img_friends(driver, friends_data):
    """ 
    Permet d enregistrer la photo grand format des amis
    args: driver l'instance du browser, friends = la liste des blocs html des amis
    """
    global user_folder
    
    if not os.path.exists(user_folder+'/friends_photos'):
        os.makedirs(user_folder+'/friends_photos')
        
    for friend_data in friends_data:
        driver.get(friend_data['url'])
        profile_pic = driver.find_element_by_css_selector('.profilePicThumb')
        link = profile_pic.get_attribute('href')
        driver.get(link)
        
        try:
            shared_image = driver.find_element_by_css_selector('.spotlight')
        except Exception:
            print('Warning: No spotlight')
            shared_image = None       
        
        if shared_image!=None:
            img_src = shared_image.get_attribute('src')
        else:
            img_src = driver.find_element_by_css_selector('img.img').get_attribute('src')
        
        urllib.urlretrieve(img_src, user_folder+'/friends_photo/'+friend_data['id']+".jpg")


def about_section_info(driver):       
    about_url = driver.find_element_by_css_selector('[data-tab-key="about"]').get_attribute('href')        
    driver.get(about_url)
    contact_basic = driver.find_element_by_css_selector('[data-overviewsection="contact_basic"]')
    print(contact_basic.text)

def get_messages_to(user_id): 
    """ 
    Permet de récupérer les messages envoyés à l'utilisateur avec l'id = user_id
    args: l'identificateur de l'utilisateur FB
    Juste un test !!
    """
    driver.get(BASE_URL+'/messages/t/'+user_id)
    div_messages = driver.find_element_by_css_selector('._4u-c._1wfr._9hq')
    driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', div_messages)
    return div_messages

def get_account_photos():
    global user_folder
    
    if not os.path.exists(user_folder+'/photos'):
        os.makedirs(user_folder+'/photos')
        
    photos_section = driver.find_element_by_css_selector('[data-tab-key="photos"]').get_attribute('href') 
    driver.get(photos_section)
    photos = driver.find_elements_by_css_selector('a.uiMediaThumb._6i9.uiMediaThumbMedium')
    for photo in photos:  
        style = photo.find_element_by_css_selector('i.uiMediaThumbImg').get_attribute('style')
        #expression régulière
        res = re.search('url\("(https://.+)"\);$', style)         
        img_src = res.group(1)
        urllib.urlretrieve(img_src, user_folder+'/photos/'+ photo.get_attribute('id') +".jpg")

def construct_social_graph(driver, friends_data):
    #friend = friends_data[0]
    #mutual_friends_url = friend['url']+'/friends_mutual'
    #mutual_friends_blocks = get_friends(driver, mutual_friends_url)
    #mutual_friends_data = make_friends_data(mutual_friends_blocks)
    # Create empty graph
    G = nx.Graph()
    #le noeud correspondant au compte traité
    G.add_node(CONFIG["auth"]['fb_user'])
    for friend in friends_data:
        G.add_node(friend['id'])
        G.add_edge(CONFIG["auth"]['fb_user'], friend['id'])    
    print(nx.info(G))

In [None]:
#Lancement de l'exécution
print('init driver')
driver = init_driver()

print('logging to FB account')
logged_in = login_to_account(driver, CONFIG["auth"]['fb_user'] , CONFIG['auth']['fb_pass']) 

if(logged_in):
    friends_section_url = get_friends_section_url(driver)
    print('Getting friends items')
    friends = get_friends(driver, friends_section_url)
    print('extracting data from friends items')
    friends_data = make_friends_data(friends)
    print('constructing the social graph of the account')
    construct_social_graph(driver, friends_data)
    print('Getting About user info')
    about_section_info(driver)
    print('Getting photos available ophotos section')
    get_account_photos()
    save_thumb_img_friends(friends_data)

init driver
logging to FB account


In [119]:
#messages = get_messages_to(friends_data[0]['id])
#print(messages.text)
#df.to_csv('friends.csv', index=False, encoding='utf-8')
#save_thumb_img_friends(driver, friends_data)
#save_full_img_friends(dsave_thumb_img_friendsriver, friends_data)

#driver.quit() 
