In [4]:
from datetime import datetime
from random import random
from bs4 import BeautifulSoup

import requests
import base64
import argparse

BASE_URLS = [
    "http://jandan.net/pic",
    "http://jandan.net/treehole",
    "http://jandan.net/qa",
]

emojilist = ["üòª", "üê∏", "üëΩ", "‚öïÔ∏è", "‚ù§Ô∏è", "üëë"]

HELP_TEXT = """
me_in_jiandan v1.1 \033[0;34mÊ≥®Ôºö\033[0m
Êó†ËÅäÂõæÊÄªÈ°µÊï∞Á∫¶‰∏∫180ÔºåÊ†ëÊ¥ûÁ∫¶‰∏∫80ÔºåÈóÆÁ≠îÁ∫¶‰∏∫10
Ëã•ÂëΩ‰ª§Ë°åÊîØÊåÅÔºåÂèØ‰ª•‚Äúctrl+ÁÇπÂáª‚ÄùÊâìÂºÄurl"""


class Configure:
    def __init__(self, userName: str, maxPages: int, isVerbose: bool) -> None:
        self.userName = userName
        self.maxPages = maxPages
        self.isVerbose = isVerbose

    def __repr__(self) -> str:
        return f"""
[ÂΩìÂâçÈÖçÁΩÆ]
userName:   {self.userName}
maxPages:   {self.maxPages}
isVerbose:  {self.isVerbose}
        """


class Crawler:
    def __init__(self, base_url, configure: Configure) -> None:
        self.configure = configure
        self.base_url = base_url
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0"
        }
        self.results = list()
        self.curpage = 0

    def get_max_pages(self, raw: BeautifulSoup) -> int:
        try:
            return int(
                str(raw.find_all(class_="current-comment-page")[0])
                .split("[")[-1]
                .split("]")[0]
            )
        except Exception as e:
            print(e if self.configure.isVerbose else "Âá∫Èîô‰∫Ü")

    def find_post_in_page(self, url: str, page: BeautifulSoup) -> list:
        result_map = []
        result = []
        for comment in [
            x
            for x in page.select(".commentlist>li")
            if self.configure.userName in str(x.select(".author>strong"))
        ]:
            try:
                result_map.append(
                    {
                        "type": url.split("jandan.net/")[1].split("/")[0],
                        "url": "http://jandan.net/t/"
                        + comment.select(".righttext>a")[0].text,
                        "oo": comment.select(".tucao-like-container")[0]
                        .select("span")[0]
                        .text,
                        "xx": comment.select(".tucao-unlike-container")[0]
                        .select("span")[0]
                        .text,
                        "tucao": comment.select(".tucao-btn")[0].text,
                    }
                )
            except IndexError as e:
                if self.configure.isVerbose:
                    print(e)
                continue
        for jsonitem in result_map:
            result.append(
                f"{jsonitem['url']}\too {jsonitem['oo']}\t xx {jsonitem['xx']}\t{jsonitem['tucao']}"
            )
        if self.configure.isVerbose:
            print(f"Page {self.curpage} Found {len(result)} item(s).")
        else:
            if len(result) != 0:
                emoji = emojilist[int((random() * 100)) % len(emojilist)]
                # emoji = emojilist[len(result)]
                print(f"Page {self.curpage}: {len(result)} " + emoji)
        return result

    def craw(self) -> list:
        bs = BeautifulSoup(
            requests.get(self.base_url, headers=self.headers).text, "html.parser"
        )
        self.max_pages = self.get_max_pages(bs)

        crawpagecount = self.configure.maxPages
        if self.configure.isVerbose:
            print("‚ö°crawpagecount:" + str(crawpagecount))

        for i in range(self.max_pages, self.max_pages - crawpagecount, -1):
            if i < 1:
                break
            url = (
                self.base_url
                + "/"
                + base64.urlsafe_b64encode(
                    (
                        datetime.now().strftime("%Y%m%d").__str__() + "-" + str(i)
                    ).encode()
                ).decode()
            )
            self.curpage = i
            try:
                resp = requests.get(url, headers=self.headers)
            except Exception as e:
                print(
                    "Something went wrong!" + e
                    if self.configure.isVerbose
                    else "Something went wrong!"
                )
            if not resp.ok:
                print("Oops! Something went wrong!")
                continue
            # if pic or treehole
            self.results += self.find_post_in_page(
                url, BeautifulSoup(resp.text, "html.parser")
            )
        return self.results


def process_arguments():
    parser = argparse.ArgumentParser(description=HELP_TEXT)
    parser.add_argument(
        "--username",  # Áî®Êà∑ÂêçËÆæÁΩÆÔºåÂøÖÂ°´ÔºåÊé®Ëçê‰ΩøÁî®ÂÖ®ÂêçÔºåÂõ†‰∏∫ÊòØÊ®°Á≥äÂåπÈÖçÁöÑ„ÄÇ
        "-u",
        metavar="Username",
        type=str,
        action="store",
        required=True,
        help="ÁõÆÊ†áÁî®Êà∑Âêç",
        dest="userName",
    )
    parser.add_argument(
        "--max-pages",  # Áà¨ÂèñÁöÑÊúÄÂ§ßÈ°µÊï∞ÔºåË∂äÂ§öË∂äÂç°ÔºåÂõ†‰∏∫Ê≤°ÂºÄÂ§öÁ∫øÁ®ã
        "-m",
        metavar="N",
        default=30,  # default 30 ‰∏çÁÑ∂ÁΩëÂèãÂèëÁöÑÂ§™Â§öÊ†πÊú¨Áà¨‰∏çÂà∞Ëá™Â∑±ÂèëÁöÑÈÉΩÈ°∂Êéâ‰∫Ü
        type=int,
        action="store",
        required=False,
        help="ÊúÄÂ§ßÁà¨ÂèñÈ°µÈù¢",
        dest="maxPages",
    )
    parser.add_argument(
        "--verbose",  # ÊòØÂê¶ÊòæÁ§∫ËØ¶ÁªÜ‰ø°ÊÅØÔºàÂ∫üËØùÊ®°ÂºèÔºâ
        "-v",
        default=False,
        action="store_true",
        help="Â∫üËØùÊ®°Âºè",
        dest="isVerbose",
    )
    args = parser.parse_args()
    return Configure(args.userName, args.maxPages, args.isVerbose)



def main(config):
    print("üê¢Áà¨Ë°å‰∏≠‚Ä¶")
    
    
    
    for url in BASE_URLS:
        print(f"\033[0;33m{url}\033[0m")
        linklist = Crawler(url, config).craw()
        if len(linklist) > 0:
            print("\033[0;32m" + str(len(linklist)) + " result(s) found" + "\033[0m")
            for link in linklist:
                print(link)
        else:
            print("\033[0;31mno result found\033[0m")
        print("")
    print("üê¢Áà¨ÂÆåÂï¶~")



usage: ipykernel_launcher.py [-h] --username Username [--max-pages N]
                             [--verbose]
ipykernel_launcher.py: error: the following arguments are required: --username/-u


SystemExit: 2

In [14]:
main(Configure("ÊàëÁöÑ", 30, False))


üê¢Áà¨Ë°å‰∏≠‚Ä¶
[0;33mhttp://jandan.net/pic[0m
Page 197: 1 üê∏
Page 196: 1 üëΩ


KeyboardInterrupt: 