Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions Coderforces_Problem_Scrapper/Codeforces_problem_scrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import os
from selenium import webdriver # Automated webdriver
from PIL import Image
from fpdf import FPDF # For converting images to pdf


def select_difficulty():
"""
This function will let user to choose the difficulty level
:return: difficulty_level[]
"""
difficulty_level = []
print("\nEnter the Range of difficulty between 800 to 3500: ")
difficulty_level.append(int(input("Min: ")))
difficulty_level.append(int(input("Max: ")))

return difficulty_level


def extracting_problem_links(diff_level):
"""
This function saves first saves the link of the pages to scrape from
and then the link of every question, saves it in list
:param diff_level: difficulty_level entered by the user
:return pblms_links: consists of all the available questions to scrape
"""
no_of_questions = int(input("\nHow many Questions you want to scrape: "))

pblms_link_scraped = 0
pblms_links = []
page = 1
options = webdriver.ChromeOptions()
options.headless = True
driver = webdriver.Chrome(DRIVER_PATH, options=options)
print("\nRequesting URL ...")
driver.get(f"https://codeforces.com/problemset/?tags={diff_level[0]}-{diff_level[1]}")

# ===================Getting no. of Pages to Scrape=============================

# It will give the total no. of pages present with that question from
# which we are going to scrape
page_links = []

print("\nFinding available pages to scrape....")

available_pages = driver.find_elements_by_css_selector("div.pagination a")
for page_no in available_pages:
page_links.append(page_no.get_attribute("href"))

print(f"Available Pages to scrape are: {len(page_links[:-1])}")

# ===================================================================================

# ***************************** SCRAPING PAGE 1 *************************************
print(f"\nScraping Page {page}")

elements = driver.find_elements_by_css_selector("td.id.dark.left a" and "td.id.left a")
for element in elements:
# Saving the link in pblms_links
pblms_links.append(element.get_attribute("href"))
pblms_link_scraped += 1

# If we scraped required no. of questions then return
if pblms_link_scraped == no_of_questions:
print(f"URLs of Question Scraped till now: {pblms_link_scraped}")
print(f"\nURLs Scrapped Successfully {pblms_link_scraped} out of {no_of_questions}")
return pblms_links
page += 1
print(f"URLs of Question Scraped till now: {pblms_link_scraped}")
# *************************************************************************************

# ----------------------------- SCRAPING SUBSEQUENT PAGES -----------------------------
for link in page_links[1:-1]:
print(f"\nScraping Page {page}")

# Going to next Page
driver.get(link)
elements = driver.find_elements_by_css_selector("td.id.dark.left a" and "td.id.left a")
for element in elements:
# Saving the link in pblms_links
pblms_links.append(element.get_attribute("href"))
pblms_link_scraped += 1

# If we scraped required no. of questions then return
if pblms_link_scraped == no_of_questions:
print(f"URLs of Question Scraped till now: {pblms_link_scraped}")
print(f"\nURLs Scrapped Successfully {pblms_link_scraped} out of {no_of_questions}")
return pblms_links

print(f"URLs of Question Scraped till now: {pblms_link_scraped}")
page += 1
# ----------------------------------------------------------------------------------------------

# scraped all the available questions but still the count is less
print(f"\n{pblms_link_scraped} out of {no_of_questions} URLs able to scrapped !!!")
return pblms_links


def getproblem(URLs):
"""
getproblem() : It takes input from the user of codeforces problemID and difficulty
level and then by using selenium and chrome webdriver, capturing screenshot of the
Codeforces problem using ttypography tag because all the problems of codeforces are
stored inside this div tag and saving it in a image.png file.
Then saving the image.png as pdf file by using fdf library.
"""

path = 'image.png'

# Creating a Target Output Folder
target_folder = './Coderforces_Problem_Scrapper/problems_pdf'
if not os.path.exists(target_folder):
os.makedirs(target_folder)

options = webdriver.ChromeOptions()
# Headless = True for taking a scrolling snapshot
options.headless = True
driver = webdriver.Chrome(DRIVER_PATH, options=options)
file_counter = 1

for url in URLs:
driver.get(url)
# Deciding height by tag
required_height = driver.execute_script(
'return document.body.parentNode.scrollHeight')
driver.set_window_size(1366, required_height)

title = driver.find_element_by_class_name("title").text
filename = title[3:] + '.pdf'

# Taking SS of everything within the ttypography class
driver.find_element_by_class_name('ttypography').screenshot(path)

# Opening image with pillow so based to capture its height and width
cover = Image.open(path)
WIDTH, HEIGHT = cover.size
MARGIN = 10
# based on image's height and width we are adjusting the pdf margin and borders
pdf = FPDF(unit='pt', format=[WIDTH + 2 * MARGIN, HEIGHT + 2 * MARGIN])
pdf.add_page() # Adding new page to the pdf
pdf.image(path, MARGIN, MARGIN)

pdf.output(os.path.join(target_folder, filename), "F") # saving the pdf with the specified filename
print(f'File saved in your directory ./problems_pdf/{filename} ({file_counter}/{len(URLs)}) !')
file_counter += 1


if __name__ == "__main__":
DRIVER_PATH = input("Enter DRIVER PATH location: ")
diff = select_difficulty() # Accepting difficulty level from user
problems_link = extracting_problem_links(diff) # scraping the required the no. of links
getproblem(problems_link) # saving the Questions in PDF file.
os.remove('image.png')
35 changes: 35 additions & 0 deletions Coderforces_Problem_Scrapper/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Save any number of Problem Statement you like from Codeforces as a PDF.

This python script will let you download any number of Problem Statements from Codeforces and save them as a pdf file. The script uses Selenium Webdriver and fpdf library. Selenium is used with Chrome Webdriver, so having Chrome browser is a requirement.

## Setting up:

- Create a virtual environment and activate it.

- Install the requirements

```sh
$ pip install -r requirements.txt
```

## Running the script:

```sh
$ python Codeforces_Problem_Scrapper.py
```

## Terminal Screenshot:

![Imgur](https://i.imgur.com/gqHMxMz.png)

The program will ask you to enter:
1. DRIVER PATH
2. VALID Difficulty Range of PROBLEMS.
3. Number of Questions to Scrape.

## PDF Output:
![Imgur](https://i.imgur.com/1iMC7PE.png)
![GIF](https://media.giphy.com/media/lQ95K1IzUGB2tiqlmZ/giphy.gif)

## Author
[ Akhil Bhalerao ](https://github.com/iamakkkhil)
3 changes: 3 additions & 0 deletions Coderforces_Problem_Scrapper/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pillow
fpdf
selenium
40 changes: 40 additions & 0 deletions Script to check Sentiment/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# <b>Sentiment Detector</b>

[![forthebadge](https://forthebadge.com/images/badges/made-with-python.svg)](https://forthebadge.com)

## Sentiment Detector Functionalities : 🚀

- In the input field write the sentence whose sentiment is to be checked
- On clicking on ```check sentiment``` button it displays the percentage of neutral, positive and negative sentiments
- It also displays the overall sentiment

## Sentiment Detector Instructions: 👨🏻‍💻

### Step 1:

Open Termnial 💻

### Step 2:

Locate to the directory where python file is located 📂

### Step 3:

Run the command: python script.py/python3 script.py 🧐

### Step 4:

Sit back and Relax. Let the Script do the Job. ☕

## Requirements

- vaderSentiment
- tkinter

## DEMO
![Screenshot (211)](https://user-images.githubusercontent.com/60662775/115137592-9ac8c000-a044-11eb-83fe-84eaeb549283.png)

## Author

Amit Kumar Mishra

92 changes: 92 additions & 0 deletions Script to check Sentiment/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from tkinter import *

def detect_sentiment():

sentence = textArea.get("1.0", "end")
sid_obj = SentimentIntensityAnalyzer()

sentiment_dict = sid_obj.polarity_scores(sentence)

string = str(sentiment_dict['neg']*100) + "% Negative"
negativeField.insert(10, string)

string = str(sentiment_dict['neu']*100) + "% Neutral"
neutralField.insert(10, string)

string = str(sentiment_dict['pos']*100) +"% Positive"
positiveField.insert(10, string)

if sentiment_dict['compound'] >= 0.05 :
string = "Positive"

elif sentiment_dict['compound'] <= - 0.05 :
string = "Negative"
else :
string = "Neutral"

overallField.insert(10, string)

def clearAll() :

negativeField.delete(0, END)
neutralField.delete(0, END)
positiveField.delete(0, END)
overallField.delete(0, END)
textArea.delete(1.0, END)


gui = Tk()
gui.config(background = "light blue")
gui.title("Sentiment Detector")

gui.geometry("500x500")
enterText = Label(gui, text = "Enter Your Sentence",bg = "light blue")

textArea = Text(gui, height = 10, width = 53, font = "lucida 13")

check = Button(gui, text = "Check Sentiment", fg = "Black",bg = "light yellow", command = detect_sentiment)
negative = Label(gui, text = "sentence was rated as: ",bg = "light blue")

neutral = Label(gui, text = "sentence was rated as: ",bg = "light blue")

positive = Label(gui, text = "sentence was rated as: ",bg = "light blue")

overall = Label(gui, text = "Sentence Overall Rated As: ",bg = "light blue")

negativeField = Entry(gui)

neutralField = Entry(gui)
positiveField = Entry(gui)
overallField = Entry(gui)
clear = Button(gui, text = "Clear", fg = "Black",bg = "light yellow", command = clearAll)
Exit = Button(gui, text = "Exit", fg = "Black",bg = "light yellow", command = exit)

enterText.grid(row = 0, column = 2)

textArea.grid(row = 1, column = 2, padx = 10, sticky = W)

check.grid(row = 2, column = 2)

neutral.grid(row = 3, column = 2)

neutralField.grid(row = 4, column = 2)

positive.grid(row = 5, column = 2)

positiveField.grid(row = 6, column = 2)

negative.grid(row = 7, column = 2)

negativeField.grid(row = 8, column = 2)

overall.grid(row = 9, column = 2)

overallField.grid(row = 10, column = 2)

clear.grid(row = 11, column = 2)

Exit.grid(row = 12, column = 2)

gui.mainloop()