Skip to content

Commit

Permalink
Convert to a proper Python module
Browse files Browse the repository at this point in the history
  • Loading branch information
SkYNewZ committed Nov 30, 2019
1 parent e2b3571 commit 4db2054
Show file tree
Hide file tree
Showing 11 changed files with 67 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Expand Up @@ -114,7 +114,7 @@ dmypy.json
.idea

## Generated data
src/Data
/data

## Python venv
venv
10 changes: 6 additions & 4 deletions README.md
Expand Up @@ -122,20 +122,20 @@ $ python3 -m venv venv
$ source venv/bin/activate

# Install Python requirements
$ pip install -r requirements.txt
$ pip install -e .
```

The code is multi-platform and is tested on both Windows and Linux.
The tool uses latest version of [Chrome Web Driver](http://chromedriver.chromium.org/downloads). I have placed the webdriver along with the code but if that version doesn't work then replace the chrome web driver with the latest one according to your platform and your Google Chrome version.

### How to Run

- Fill your Facebook credentials into [`src/credentials.yaml`](src/credentials.yaml)
- Edit the [`src/input.txt`](src/input.txt) file and add many profiles links as you want in the following format with each link on a new line:
- Fill your Facebook credentials into [`credentials.yaml`](credentials.yaml)
- Edit the [`input.txt`](input.txt) file and add many profiles links as you want in the following format with each link on a new line:

Make sure the link only contains the username or id number at the end and not any other stuff. Make sure its in the format mentioned above.

> Note: There are two modes to download Friends Profile Pics and the user's Photos: Large Size and Small Size. You can change the following variables. By default they are set to Small Sized Pics because its really quick while Large Size Mode takes time depending on the number of pictures to download
> Note: There are two modes to download Friends Profile Pics and the user's Photos: Large Size and Small Size. You can change the following variables in [`scraper/scraper.py`](scraper/scraper.py#L30). By default they are set to Small Sized Pics because its really quick while Large Size Mode takes time depending on the number of pictures to download
```python
# whether to download the full image or its thumbnail (small size)
Expand All @@ -145,6 +145,8 @@ friends_small_size = True
photos_small_size = True
```

Run the `ultimate-facebook-scraper` command ! 🚀

---

## Citation
Expand Down
1 change: 1 addition & 0 deletions scraper/__init__.py
@@ -0,0 +1 @@
__version__ = "0.0.1"
3 changes: 3 additions & 0 deletions scraper/__main__.py
@@ -0,0 +1,3 @@
from .scraper import scrapper

scrapper()
30 changes: 22 additions & 8 deletions src/scraper.py → scraper/scraper.py
Expand Up @@ -39,6 +39,9 @@
facebook_https_prefix = "https://"


CHROMEDRIVER_BINARIES_FOLDER = "bin"


# -------------------------------------------------------------
# -------------------------------------------------------------

Expand Down Expand Up @@ -610,7 +613,7 @@ def create_folder(folder):


def scrap_profile(ids):
folder = os.path.join(os.getcwd(), "Data")
folder = os.path.join(os.getcwd(), "data")
create_folder(folder)
os.chdir(folder)

Expand Down Expand Up @@ -790,10 +793,17 @@ def login(email, password):
try:
platform_ = platform.system().lower()
chromedriver_versions = {
"linux": "./chromedriver_linux64",
"darwin": "./chromedriver_mac64",
"windows": "./chromedriver_win32.exe",
"linux": os.path.join(
os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_linux64",
),
"darwin": os.path.join(
os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_mac64",
),
"windows": os.path.join(
os.getcwd(), CHROMEDRIVER_BINARIES_FOLDER, "chromedriver_win32.exe",
),
}

driver = webdriver.Chrome(
executable_path=chromedriver_versions[platform_], options=options
)
Expand All @@ -814,8 +824,12 @@ def login(email, password):
driver.find_element_by_name("email").send_keys(email)
driver.find_element_by_name("pass").send_keys(password)

# clicking on login button
driver.find_element_by_id("loginbutton").click()
try:
# clicking on login button
driver.find_element_by_id("loginbutton").click()
except NoSuchElementException:
# Facebook new design
driver.find_element_by_name("login").click()

# if your account uses multi factor authentication
mfa_code_input = safe_find_element_by_id(driver, "approvals_code")
Expand Down Expand Up @@ -844,7 +858,7 @@ def login(email, password):
# -----------------------------------------------------------------------------


def main():
def scrapper(**kwargs):
with open("credentials.yaml", "r") as ymlfile:
cfg = yaml.safe_load(stream=ymlfile)

Expand Down Expand Up @@ -873,4 +887,4 @@ def main():

if __name__ == "__main__":
# get things rolling
main()
scrapper()
34 changes: 34 additions & 0 deletions setup.py
@@ -0,0 +1,34 @@
import setuptools

from scraper import __version__


with open("README.md", "r") as fh:
long_description = fh.read()

setuptools.setup(
name="ultimate-facebook-scraper",
version=__version__,
author="Haris Muneer",
author_email="haris.muneer@conradlabs.com",
license="MIT",
keywords="Facebook Scraper",
description="A bot which scrapes almost everything about a Facebook user's profile",
long_description_content_type="text/markdown",
long_description=long_description,
url="https://github.com/harismuneer/Ultimate-Facebook-Scraper",
packages=setuptools.find_packages(),
classifiers=[
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires=">=3.7",
extras_require={"dev": ["black", "twine", "wheel"],},
install_requires=["selenium==3.141.0", "pyyaml"],
entry_points={
"console_scripts": ["ultimate-facebook-scraper=scraper.__main__:scraper",],
},
)
Binary file removed src/chromedriver_linux64
Binary file not shown.
Binary file removed src/chromedriver_mac64
Binary file not shown.
Binary file removed src/chromedriver_win32.exe
Binary file not shown.
2 changes: 0 additions & 2 deletions src/credentials.yaml

This file was deleted.

2 changes: 0 additions & 2 deletions src/input.txt

This file was deleted.

0 comments on commit 4db2054

Please sign in to comment.