From c70caacb253a6da0873035618f50026ca56c6326 Mon Sep 17 00:00:00 2001 From: Akhil Bhalerao <55273506+iamakkkhil@users.noreply.github.com> Date: Fri, 16 Apr 2021 23:47:27 +0530 Subject: [PATCH 1/7] #913 Codeforces_Problem_Scraper Added --- .../Codeforces_problem_scrapper.py | 53 +++++++++++++++++++ Coderforces_Problem_Scrapper/README.md | 36 +++++++++++++ Coderforces_Problem_Scrapper/requirements.txt | 4 ++ 3 files changed, 93 insertions(+) create mode 100644 Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py create mode 100644 Coderforces_Problem_Scrapper/README.md create mode 100644 Coderforces_Problem_Scrapper/requirements.txt diff --git a/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py b/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py new file mode 100644 index 0000000000..9effe6bc4d --- /dev/null +++ b/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py @@ -0,0 +1,53 @@ +import os +from selenium import webdriver # Automated webdriver +from PIL import Image +from fpdf import FPDF # For converting images to pdf + + +def getproblem(): + """ + getproblem() : It takes input from the user of codeforces problemID and difficulty + level and then by using selenium and chrome webdriver, capturing screenshot of the + Codeforces problem using ttypography tag because all the problems of codeforces are + stored inside this div tag and saving it in a image.png file. + Then saving the image.png as pdf file by using fdf library. + """ + + # Taking input from the user to search for the problem + Pblm_id = input("Enter the Problem ID: ") + difficulty = input("Enter the difficulty level: ") + filename = input('Enter the file name to store Question: ') + '.pdf' + + # Going to the specific URL + url = "https://codeforces.com/problemset/problem/" + Pblm_id + "/" + difficulty + path = 'image.png' + options = webdriver.ChromeOptions() + + # Headless = True for taking a scrolling snapshot + options.headless = True + driver = webdriver.Chrome(r"chromedriver_win32\chromedriver.exe", options=options) + driver.get(url) + # Deciding height by tag + required_height = driver.execute_script( + 'return document.body.parentNode.scrollHeight') + driver.set_window_size(1366, required_height) + + # Taking SS of everything within the ttypography class + driver.find_element_by_class_name('ttypography').screenshot(path) + + # Opening image with pillow so based to capture its height and width + cover = Image.open(path) + WIDTH, HEIGHT = cover.size + MARGIN = 10 + # based on image's height and width we are adjusting the pdf margin and borders + pdf = FPDF(unit='pt', format=[WIDTH + 2 * MARGIN, HEIGHT + 2 * MARGIN]) + pdf.add_page() # Adding new page to the pdf + pdf.image(path, MARGIN, MARGIN) + pdf.output(filename, "F") # saving the pdf with the specified filename + + print(f'\nGreat Success!!! Check your directory for {filename} file!') + + +if __name__ == "__main__": + getproblem() + os.remove('image.png') \ No newline at end of file diff --git a/Coderforces_Problem_Scrapper/README.md b/Coderforces_Problem_Scrapper/README.md new file mode 100644 index 0000000000..83927850bb --- /dev/null +++ b/Coderforces_Problem_Scrapper/README.md @@ -0,0 +1,36 @@ +# Save any Problem Statement you like from Codeforces as a PDF. + +This python script will let you download Problem Statements from Codeforces and save them as a pdf file. The script uses Selenium Webdriver and fpdf library. Selenium is used with Chrome Webdriver, so having Chrome browser is a requirement. + +## Setting up: + +- Create a virtual environment and activate it. + +- Install the requirements + +```sh + $ pip install -r requirements.txt +``` + +## Running the script: + +```sh + $ python Codeforces_problem_scrapper.py +``` + +## Terminal Screenshot: + +![Imgur](https://i.imgur.com/Qr0AwMG.png) + +The program will ask you to enter: +1. Valid Problem ID. +2. Valid Difficulty Level. +3. filename(without '.pdf'). The pdf will be created in the same folder. + +## PDF Output: +![Imgur](https://i.imgur.com/GpSxCRZ.png) +![Imgur](https://i.imgur.com/c5mCNWM.png) + +## Author +[Akhil Bhalerao](https://github.com/iamakkkhil) + diff --git a/Coderforces_Problem_Scrapper/requirements.txt b/Coderforces_Problem_Scrapper/requirements.txt new file mode 100644 index 0000000000..fab18c7dec --- /dev/null +++ b/Coderforces_Problem_Scrapper/requirements.txt @@ -0,0 +1,4 @@ +pillow +fpdf +selenium + From 8e193c511f181066896ff00047f9364d9fce35d5 Mon Sep 17 00:00:00 2001 From: Akhil Bhalerao <55273506+iamakkkhil@users.noreply.github.com> Date: Sat, 17 Apr 2021 17:19:03 +0530 Subject: [PATCH 2/7] User IP of Driver Path Added --- .../Codeforces_problem_scrapper.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py b/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py index 9effe6bc4d..94f2499f09 100644 --- a/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py +++ b/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py @@ -2,7 +2,7 @@ from selenium import webdriver # Automated webdriver from PIL import Image from fpdf import FPDF # For converting images to pdf - +DRIVER_PATH = '' def getproblem(): """ @@ -25,7 +25,7 @@ def getproblem(): # Headless = True for taking a scrolling snapshot options.headless = True - driver = webdriver.Chrome(r"chromedriver_win32\chromedriver.exe", options=options) + driver = webdriver.Chrome(DRIVER_PATH, options=options) driver.get(url) # Deciding height by tag required_height = driver.execute_script( @@ -49,5 +49,6 @@ def getproblem(): if __name__ == "__main__": + DRIVER_PATH = input("Enter DRIVER PATH location: ") getproblem() - os.remove('image.png') \ No newline at end of file + os.remove('image.png') From 502e3ca37c6e5242675aaca46c7a57246553487d Mon Sep 17 00:00:00 2001 From: Amit kumar mishra Date: Sun, 18 Apr 2021 12:51:00 +0530 Subject: [PATCH 3/7] Sentiment Detector --- Script to check Sentiment/Readme.md | 40 +++++++++++++ Script to check Sentiment/script.py | 92 +++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 Script to check Sentiment/Readme.md create mode 100644 Script to check Sentiment/script.py diff --git a/Script to check Sentiment/Readme.md b/Script to check Sentiment/Readme.md new file mode 100644 index 0000000000..5482672ca5 --- /dev/null +++ b/Script to check Sentiment/Readme.md @@ -0,0 +1,40 @@ +# Sentiment Detector + +[![forthebadge](https://forthebadge.com/images/badges/made-with-python.svg)](https://forthebadge.com) + +## Sentiment Detector Functionalities : 🚀 + +- In the input field write the sentence whose sentiment is to be checked +- On clicking on ```check sentiment``` button it displays the percentage of neutral, positive and negative sentiments +- It also displays the overall sentiment + +## Sentiment Detector Instructions: 👨🏻‍💻 + +### Step 1: + + Open Termnial 💻 + +### Step 2: + + Locate to the directory where python file is located 📂 + +### Step 3: + + Run the command: python script.py/python3 script.py 🧐 + +### Step 4: + + Sit back and Relax. Let the Script do the Job. ☕ + +## Requirements + + - vaderSentiment + - tkinter + +## DEMO +![Screenshot (211)](https://user-images.githubusercontent.com/60662775/115137592-9ac8c000-a044-11eb-83fe-84eaeb549283.png) + +## Author + + Amit Kumar Mishra + diff --git a/Script to check Sentiment/script.py b/Script to check Sentiment/script.py new file mode 100644 index 0000000000..23579ddf20 --- /dev/null +++ b/Script to check Sentiment/script.py @@ -0,0 +1,92 @@ +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer +from tkinter import * + +def detect_sentiment(): + + sentence = textArea.get("1.0", "end") + sid_obj = SentimentIntensityAnalyzer() + + sentiment_dict = sid_obj.polarity_scores(sentence) + + string = str(sentiment_dict['neg']*100) + "% Negative" + negativeField.insert(10, string) + + string = str(sentiment_dict['neu']*100) + "% Neutral" + neutralField.insert(10, string) + + string = str(sentiment_dict['pos']*100) +"% Positive" + positiveField.insert(10, string) + + if sentiment_dict['compound'] >= 0.05 : + string = "Positive" + + elif sentiment_dict['compound'] <= - 0.05 : + string = "Negative" + else : + string = "Neutral" + + overallField.insert(10, string) + +def clearAll() : + + negativeField.delete(0, END) + neutralField.delete(0, END) + positiveField.delete(0, END) + overallField.delete(0, END) + textArea.delete(1.0, END) + + +gui = Tk() +gui.config(background = "light blue") +gui.title("Sentiment Detector") + +gui.geometry("500x500") +enterText = Label(gui, text = "Enter Your Sentence",bg = "light blue") + +textArea = Text(gui, height = 10, width = 53, font = "lucida 13") + +check = Button(gui, text = "Check Sentiment", fg = "Black",bg = "light yellow", command = detect_sentiment) +negative = Label(gui, text = "sentence was rated as: ",bg = "light blue") + +neutral = Label(gui, text = "sentence was rated as: ",bg = "light blue") + +positive = Label(gui, text = "sentence was rated as: ",bg = "light blue") + +overall = Label(gui, text = "Sentence Overall Rated As: ",bg = "light blue") + +negativeField = Entry(gui) + +neutralField = Entry(gui) +positiveField = Entry(gui) +overallField = Entry(gui) +clear = Button(gui, text = "Clear", fg = "Black",bg = "light yellow", command = clearAll) +Exit = Button(gui, text = "Exit", fg = "Black",bg = "light yellow", command = exit) + +enterText.grid(row = 0, column = 2) + +textArea.grid(row = 1, column = 2, padx = 10, sticky = W) + +check.grid(row = 2, column = 2) + +neutral.grid(row = 3, column = 2) + +neutralField.grid(row = 4, column = 2) + +positive.grid(row = 5, column = 2) + +positiveField.grid(row = 6, column = 2) + +negative.grid(row = 7, column = 2) + +negativeField.grid(row = 8, column = 2) + +overall.grid(row = 9, column = 2) + +overallField.grid(row = 10, column = 2) + +clear.grid(row = 11, column = 2) + +Exit.grid(row = 12, column = 2) + +gui.mainloop() + From bc2de17463d917e4c268a41cae688024db32be88 Mon Sep 17 00:00:00 2001 From: Akhil Bhalerao <55273506+iamakkkhil@users.noreply.github.com> Date: Wed, 21 Apr 2021 23:46:27 +0530 Subject: [PATCH 4/7] Readme Updated of CodeForces Scraper --- Coderforces_Problem_Scrapper/README.md | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/Coderforces_Problem_Scrapper/README.md b/Coderforces_Problem_Scrapper/README.md index 83927850bb..1a7a1153e1 100644 --- a/Coderforces_Problem_Scrapper/README.md +++ b/Coderforces_Problem_Scrapper/README.md @@ -1,6 +1,6 @@ -# Save any Problem Statement you like from Codeforces as a PDF. +# Save any number of Problem Statement you like from Codeforces as a PDF. -This python script will let you download Problem Statements from Codeforces and save them as a pdf file. The script uses Selenium Webdriver and fpdf library. Selenium is used with Chrome Webdriver, so having Chrome browser is a requirement. +This python script will let you download any number of Problem Statements from Codeforces and save them as a pdf file. The script uses Selenium Webdriver and fpdf library. Selenium is used with Chrome Webdriver, so having Chrome browser is a requirement. ## Setting up: @@ -15,22 +15,21 @@ This python script will let you download Problem Statements from Codeforces and ## Running the script: ```sh - $ python Codeforces_problem_scrapper.py + $ python Codeforces_Problem_Scrapper.py ``` ## Terminal Screenshot: -![Imgur](https://i.imgur.com/Qr0AwMG.png) +![Imgur](https://i.imgur.com/gqHMxMz.png) The program will ask you to enter: -1. Valid Problem ID. -2. Valid Difficulty Level. -3. filename(without '.pdf'). The pdf will be created in the same folder. +1. DRIVER PATH +2. VALID Difficulty Range of PROBLEMS. +3. Number of Questions to Scrape. ## PDF Output: -![Imgur](https://i.imgur.com/GpSxCRZ.png) -![Imgur](https://i.imgur.com/c5mCNWM.png) +![Imgur](https://i.imgur.com/1iMC7PE.png) +![GIF](https://media.giphy.com/media/lQ95K1IzUGB2tiqlmZ/giphy.gif) ## Author -[Akhil Bhalerao](https://github.com/iamakkkhil) - +[ Akhil Bhalerao ](https://github.com/iamakkkhil) From 9a9ef9af5c59fde88c890863f68096712dba5cd2 Mon Sep 17 00:00:00 2001 From: Akhil Bhalerao <55273506+iamakkkhil@users.noreply.github.com> Date: Wed, 21 Apr 2021 23:47:50 +0530 Subject: [PATCH 5/7] Codeforces Problem Scraper Added --- .../Codeforces_problem_scrapper.py | 155 +++++++++++++++--- 1 file changed, 128 insertions(+), 27 deletions(-) diff --git a/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py b/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py index 94f2499f09..8731be39e3 100644 --- a/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py +++ b/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py @@ -2,9 +2,103 @@ from selenium import webdriver # Automated webdriver from PIL import Image from fpdf import FPDF # For converting images to pdf + DRIVER_PATH = '' -def getproblem(): + +def select_difficulty(): + """ + This function will let user to choose the difficulty level + :return: difficulty_level[] + """ + difficulty_level = [] + print("\nEnter the Range between 800 to 3500: ") + difficulty_level.append(int(input("Min: "))) + difficulty_level.append(int(input("Max: "))) + + return difficulty_level + + +def extracting_problem_links(diff_level): + """ + This function saves first saves the link of the pages to scrape from + and then the link of every question, saves it in list + :param diff_level: difficulty_level entered by the user + :return pblms_links: consists of all the available questions to scrape + """ + no_of_questions = int(input("\nHow many Questions you want to scrape: ")) + + pblms_link_scraped = 0 + pblms_links = [] + page = 1 + options = webdriver.ChromeOptions() + options.headless = True + driver = webdriver.Chrome(DRIVER_PATH, options=options) + print("\nRequesting URL ...") + driver.get(f"https://codeforces.com/problemset/?tags={diff_level[0]}-{diff_level[1]}") + + # ===================Getting no. of Pages to Scrape============================= + + # It will give the total no. of pages present with that question from + # which we are going to scrape + page_links = [] + + print("\nFinding available pages to scrape....") + + available_pages = driver.find_elements_by_css_selector("div.pagination a") + for page_no in available_pages: + page_links.append(page_no.get_attribute("href")) + + print(f"Available Pages to scrape are: {len(page_links[:-1])}") + + # =================================================================================== + + # ***************************** SCRAPING PAGE 1 ************************************* + print(f"\nScraping Page {page}") + + elements = driver.find_elements_by_css_selector("td.id.dark.left a" and "td.id.left a") + for element in elements: + # Saving the link in pblms_links + pblms_links.append(element.get_attribute("href")) + pblms_link_scraped += 1 + + # If we scraped required no. of questions then return + if pblms_link_scraped == no_of_questions: + print(f"URLs of Question Scraped till now: {pblms_link_scraped}") + print(f"\nURLs Scrapped Successfully {pblms_link_scraped} out of {no_of_questions}") + return pblms_links + page += 1 + print(f"URLs of Question Scraped till now: {pblms_link_scraped}") + # ************************************************************************************* + + # ----------------------------- SCRAPING SUBSEQUENT PAGES ----------------------------- + for link in page_links[1:-1]: + print(f"\nScraping Page {page}") + + # Going to next Page + driver.get(link) + elements = driver.find_elements_by_css_selector("td.id.dark.left a" and "td.id.left a") + for element in elements: + # Saving the link in pblms_links + pblms_links.append(element.get_attribute("href")) + pblms_link_scraped += 1 + + # If we scraped required no. of questions then return + if pblms_link_scraped == no_of_questions: + print(f"URLs of Question Scraped till now: {pblms_link_scraped}") + print(f"\nURLs Scrapped Successfully {pblms_link_scraped} out of {no_of_questions}") + return pblms_links + + print(f"URLs of Question Scraped till now: {pblms_link_scraped}") + page += 1 + # ---------------------------------------------------------------------------------------------- + + # scraped all the available questions but still the count is less + print(f"\n{pblms_link_scraped} out of {no_of_questions} URLs able to scrapped !!!") + return pblms_links + + +def getproblem(URLs): """ getproblem() : It takes input from the user of codeforces problemID and difficulty level and then by using selenium and chrome webdriver, capturing screenshot of the @@ -13,42 +107,49 @@ def getproblem(): Then saving the image.png as pdf file by using fdf library. """ - # Taking input from the user to search for the problem - Pblm_id = input("Enter the Problem ID: ") - difficulty = input("Enter the difficulty level: ") - filename = input('Enter the file name to store Question: ') + '.pdf' - - # Going to the specific URL - url = "https://codeforces.com/problemset/problem/" + Pblm_id + "/" + difficulty path = 'image.png' - options = webdriver.ChromeOptions() + # Creating a Target Output Folder + target_folder = './problems_pdf' + if not os.path.exists(target_folder): + os.makedirs(target_folder) + + options = webdriver.ChromeOptions() # Headless = True for taking a scrolling snapshot options.headless = True driver = webdriver.Chrome(DRIVER_PATH, options=options) - driver.get(url) - # Deciding height by tag - required_height = driver.execute_script( - 'return document.body.parentNode.scrollHeight') - driver.set_window_size(1366, required_height) + file_counter = 1 + + for url in URLs: + driver.get(url) + # Deciding height by tag + required_height = driver.execute_script( + 'return document.body.parentNode.scrollHeight') + driver.set_window_size(1366, required_height) + + title = driver.find_element_by_class_name("title").text + filename = title[3:] + '.pdf' - # Taking SS of everything within the ttypography class - driver.find_element_by_class_name('ttypography').screenshot(path) + # Taking SS of everything within the ttypography class + driver.find_element_by_class_name('ttypography').screenshot(path) - # Opening image with pillow so based to capture its height and width - cover = Image.open(path) - WIDTH, HEIGHT = cover.size - MARGIN = 10 - # based on image's height and width we are adjusting the pdf margin and borders - pdf = FPDF(unit='pt', format=[WIDTH + 2 * MARGIN, HEIGHT + 2 * MARGIN]) - pdf.add_page() # Adding new page to the pdf - pdf.image(path, MARGIN, MARGIN) - pdf.output(filename, "F") # saving the pdf with the specified filename + # Opening image with pillow so based to capture its height and width + cover = Image.open(path) + WIDTH, HEIGHT = cover.size + MARGIN = 10 + # based on image's height and width we are adjusting the pdf margin and borders + pdf = FPDF(unit='pt', format=[WIDTH + 2 * MARGIN, HEIGHT + 2 * MARGIN]) + pdf.add_page() # Adding new page to the pdf + pdf.image(path, MARGIN, MARGIN) - print(f'\nGreat Success!!! Check your directory for {filename} file!') + pdf.output(os.path.join(target_folder, filename), "F") # saving the pdf with the specified filename + print(f'File saved in your directory ./problems_pdf/{filename} ({file_counter}/{len(URLs)}) !') + file_counter += 1 if __name__ == "__main__": DRIVER_PATH = input("Enter DRIVER PATH location: ") - getproblem() + diff = select_difficulty() # Accepting difficulty level from user + problems_link = extracting_problem_links(diff) # scraping the required the no. of links + getproblem(problems_link) # saving the Questions in PDF file. os.remove('image.png') From d09eab2cd62206cd8f64f6b1ae5f0a127589eeda Mon Sep 17 00:00:00 2001 From: Akhil Bhalerao <55273506+iamakkkhil@users.noreply.github.com> Date: Wed, 21 Apr 2021 23:48:10 +0530 Subject: [PATCH 6/7] Update requirements.txt --- Coderforces_Problem_Scrapper/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Coderforces_Problem_Scrapper/requirements.txt b/Coderforces_Problem_Scrapper/requirements.txt index fab18c7dec..d776f5490d 100644 --- a/Coderforces_Problem_Scrapper/requirements.txt +++ b/Coderforces_Problem_Scrapper/requirements.txt @@ -1,4 +1,3 @@ pillow fpdf selenium - From a086be255c4113bdece4716c46c07cb704353d15 Mon Sep 17 00:00:00 2001 From: Akhil Bhalerao <55273506+iamakkkhil@users.noreply.github.com> Date: Thu, 22 Apr 2021 12:44:53 +0530 Subject: [PATCH 7/7] Resolved minor bugs --- Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py b/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py index 8731be39e3..2f2bb074a4 100644 --- a/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py +++ b/Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py @@ -3,8 +3,6 @@ from PIL import Image from fpdf import FPDF # For converting images to pdf -DRIVER_PATH = '' - def select_difficulty(): """ @@ -12,7 +10,7 @@ def select_difficulty(): :return: difficulty_level[] """ difficulty_level = [] - print("\nEnter the Range between 800 to 3500: ") + print("\nEnter the Range of difficulty between 800 to 3500: ") difficulty_level.append(int(input("Min: "))) difficulty_level.append(int(input("Max: "))) @@ -110,7 +108,7 @@ def getproblem(URLs): path = 'image.png' # Creating a Target Output Folder - target_folder = './problems_pdf' + target_folder = './Coderforces_Problem_Scrapper/problems_pdf' if not os.path.exists(target_folder): os.makedirs(target_folder)