In [None]:
import os
import requests as req
from bs4 import BeautifulSoup as soup
import json

### GENERAL CONSTANTS

---

In [None]:
WORKING_FILE_DIRECTORY = "F:\Read Anime\Light-Novel\Books\{}\{}"
TESTING_DIRECTORY = "F:\Read Anime\Light-Novel\Books\{}\\tests"
TESTING_FILE_DIRECTORY = "F:\Read Anime\Light-Novel\Books\{}\\tests\{}"
FILE_FORMAT = ".html"
TEXT_FORMAT = "utf-8"
HTML_PARSE_FORMAT = 'lxml'
SOURCE = "https://www.divinedaolibrary.com/"
TARGET_MANGA = "Martial Peak"
LOCAL_DATA = json.load(open("data.json"))

### JSON DATA KEYS

---

In [None]:
LATEST_CHAPTER = 'local_latest_chapter'
LATEST_CHAPTER_URL = 'local_latest_chapter_url'

### CONSOLE MESSAGES

---

In [None]:
NO_UPDATES_MESSAGE = "[NO UPDATES] There were no recent updates beyond {}."
LAST_CHAPTER_MESSAGE = "[LAST CHAPTER] {} is the last chapter."
NEW_CHAPTER_MESSAGE = "[NEW CHAPTER] Getting {}..."
DOWNLOAD_ERROR_MESSAGE = "[DOWNLOAD ERROR] There was an error while trying to get {}"
HTTP_REQUEST_MESSAGE = "[FETCHING DATA] Fetching data from the internet using http requests"
CONNECTION_ERROR_MESSAGE = "[CONNECTION ERROR] There was a problem connecting to DDL"
COMPLETE_MESSAGE = "[SUCCESS] All updates have successfully been downloaded"

### HTML INJECTS

---

In [None]:
BOILER = '''
<html>
    <body>
        <center>
            <div class = "main-content">
                {}
                <br><hr><br>
                {}
            </div>
        </center>
    </body>
    {}
</html>
'''
TITLE = '''
<h1 chass = "main-title">{}</h1>
'''
PARAGRAPH = '''
<p>
    {}
</p>
'''
STYLE = '''
<style>
        body {
            color: #484848;
            line-height: 1.625;
            background-color : #EEEEEE !important;
        }
        h1 {
            font-size: 35px;
            letter-spacing: 0;
            line-height: 140%;
            font-weight: 600;
            margin-top: 10px;
            margin-right: auto;
            margin-bottom: 10px;
            margin-left: auto;
            font-family: 'Lato',sans-serif;
            text-align: center;
            color: #000000;
        }
        .main-content {
            background-color : #FFFFFF;
            width : 850px;
            padding-top: 25px;
            padding-right: 35px;
            padding-bottom: 25px;
            padding-left: 35px;
            box-shadow: 0 0 15px rgba(0,0,0,.05);
        }
        .main-content p {
            font-size: 20px !important;
            font-family: 'Lato',sans-serif;
            text-align: left;
        }
        p {
            margin-bottom: 1.5em;
            line-height: 28px;
        }
</style>
'''

### Function: *get_from_soup*(url : *`str`*) `=>` (next_url : *`str`*, html : *`str`*, name : *`str`*) : *`tupule`*
A function to get the `html`, `name` and `next_url` from a `url`.<br><br>
**SPECS**
- `url`: A valid url pointing to a chapter of a light novel hosted on [Divine Dao Library][https://www.divinedaolibrary.com/]
- `html`: The html as text from the http request
- `name`: The name of the chapter. eg. **Martial Peak Chapter 1239**
- `next_url`: The url of the next chapter. Returns `None` if url doesn't exist.

---

In [None]:
def get_from_soup(url):
    print(HTTP_REQUEST_MESSAGE)

    try:
        html = req.get(url).text
    except:
        print(CONNECTION_ERROR_MESSAGE)
        return (None, None, None)

    _soup = soup(html, HTML_PARSE_FORMAT)

    title = _soup.find("h1", {"class": "entry-title"}).text
    main = _soup.find("div", {"class": "entry-content"}).findAll('p')
    title_text = TITLE.format(title)
    main_text = ""

    for stuff in main:
        main_text += PARAGRAPH.format(stuff.text)

    html_text = BOILER.format(title_text, main_text, STYLE)

    name = title.split(',')[0].replace(' - ', ' ')
    next_url = _soup.find("div", {"class": "entry-content"}).p.span.findAll('a')[2]

    return (next_url, html_text, name)

### Function: *get_updates*() : *`None`*
A function to get all the updated light novel chapters starting from the most recent local chapter.

---

In [None]:
def get_updates():
    last_chapter = LOCAL_DATA[LATEST_CHAPTER]
    last_chapter_url = LOCAL_DATA[LATEST_CHAPTER_URL]
    next_chapter_url = get_from_soup(last_chapter_url)[0]

    if next_chapter_url == None:
        print(NO_UPDATES_MESSAGE.format(last_chapter))
        return

    while next_chapter_url != None:
        chapter_url = next_chapter_url
        next_chapter_url, chapter_html, chapter_name = get_from_soup(
            chapter_url)

        print(NEW_CHAPTER_MESSAGE.format(chapter_name))

        if next_chapter_url == None:
            LOCAL_DATA[LATEST_CHAPTER] = chapter_name
            LOCAL_DATA[LATEST_CHAPTER_URL] = chapter_url
            print(LAST_CHAPTER_MESSAGE.format(chapter_name))

        try:
            chapter_html_file = open(TESTING_FILE_DIRECTORY.format(TARGET_MANGA, chapter_name + FILE_FORMAT), "w")
            #open(WORKING_FILE_DIRECTORY.format(TARGET_MANGA, chapter_name + FILE_FORMAT), "w")
            chapter_html_file.write(chapter_html)
            chapter_html_file.close()
        except:
            print(DOWNLOAD_ERROR_MESSAGE.format(chapter_name))

    print(COMPLETE_MESSAGE)

## TESTS

In [None]:
get_updates()

## EXPERIMENTS

In [None]:
def x_get_from_soup():

    print(HTTP_REQUEST_MESSAGE)
    last_chapter = LOCAL_DATA[LATEST_CHAPTER]

    try:
        #This line of code will become a http request in the main program
        html_file = open(WORKING_FILE_DIRECTORY.format(TARGET_MANGA, last_chapter + FILE_FORMAT), 'rb')
        html = str(html_file.read())
        html_file.close()
    except:
        print(CONNECTION_ERROR_MESSAGE)
        return None

    #print(html)

    _soup = soup(html, HTML_PARSE_FORMAT)

    title = _soup.find("h1", {"class": "entry-title"}).text
    main = _soup.find("div", {"class": "entry-content"}).findAll('p')

    title_text = TITLE.format(title).replace('\\xe2\\x80\\x93', '-').replace('\\xe2\\x80\\x99', "'").replace('\\xe2\\x80\\x9c', '"')
    #print(title_text)
    main_text = ""

    for stuff in main:
        main_text += PARAGRAPH.format(stuff.text).replace('\\xe2\\x80\\x93', '-').replace('\\xe2\\x80\\x99', "'").replace('\\xe2\\x80\\x9c', '"').replace('\\xe2\\x80\\x9d', '"')
    #print(main_text)

    html_text = BOILER.format(title_text, main_text, STYLE)
    #print(html_text)

    return html_text

In [None]:
def x_get_updates():
    last_chapter = LOCAL_DATA[LATEST_CHAPTER]
    last_chapter_url = LOCAL_DATA[LATEST_CHAPTER_URL]

    chapter_html = x_get_from_soup()

    print(NEW_CHAPTER_MESSAGE.format(last_chapter))
    
    try:
        chapter_html_file = open(TESTING_FILE_DIRECTORY.format(last_chapter + FILE_FORMAT), "w")
        chapter_html_file.write(chapter_html)
        chapter_html_file.close()
    except:
        print(DOWNLOAD_ERROR_MESSAGE.format(last_chapter))
    
    print(COMPLETE_MESSAGE)

In [None]:
x_get_updates()

In [None]:
files = os.listdir(TESTING_DIRECTORY.format(TARGET_MANGA))

for f in files:
    real_name = f.replace('-\x80\x93', '–').replace(' – ', ' ')
    print(f"From {f} to {real_name}")
    os.rename(TESTING_FILE_DIRECTORY.format(TARGET_MANGA, f), TESTING_FILE_DIRECTORY.format(TARGET_MANGA, real_name))