Skip to content

Commit

Permalink
Massive performance improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
alombi authored and alombi committed Jan 22, 2021
1 parent e5a05c5 commit 84efac5
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 16 deletions.
Binary file modified .DS_Store
Binary file not shown.
12 changes: 8 additions & 4 deletions api/author.py
Expand Up @@ -2,11 +2,14 @@
from http.server import BaseHTTPRequestHandler
from urllib.parse import urlparse, parse_qs

def scrapeText(username, selector):
def scrapeText(username):
url = f'https://routinehub.co/user/{username}'
req = requests.get(url)
req.raise_for_status()
soup = bs4.BeautifulSoup(req.text, 'html.parser')
return soup

def extractText(soup, selector):
elems = soup.select(selector)
res = elems[0].text.strip()
return res
Expand All @@ -23,8 +26,9 @@ def do_GET(self):
parsed_path = urlparse(self.path)
path = '?' + parsed_path.query
RoutineHubAuthor = parse_qs(path[1:])["username"][0]
soup = scrapeText(RoutineHubAuthor)
try:
bio = scrapeText(RoutineHubAuthor, '#content > div > div > div.column.details > div.is-hidden-mobile > p')
bio = extractText(soup, '#content > div > div > div.column.details > div.is-hidden-mobile > p')
except:
bio = None
totalAuthoredHTML = scrapeElems(RoutineHubAuthor, '#content > div > div > div.column.details > div.authored > div')
Expand All @@ -33,9 +37,9 @@ def do_GET(self):
total_hearts = 0
for _ in totalAuthoredHTML:
totalAuthored = totalAuthored +1
total_hearts = total_hearts + int(scrapeText(RoutineHubAuthor, f'#content > div > div > div.column.details > div.authored > div > div:nth-child({totalAuthored}) > a > div > div > div > div > nav > div.level-right > span:nth-child(2) > small'))
total_hearts = total_hearts + int(extractText(soup, f'#content > div > div > div.column.details > div.authored > div > div:nth-child({totalAuthored}) > a > div > div > div > div > nav > div.level-right > span:nth-child(2) > small'))

totalDownloads = scrapeText(RoutineHubAuthor, '#content > div > div > div.column.sidebar.is-2 > div.stats > p:nth-child(2)')
totalDownloads = extractText(soup, '#content > div > div > div.column.sidebar.is-2 > div.stats > p:nth-child(2)')
totalDownloads = totalDownloads.split('Downloads: ')[1]
downloads_average =round(int(totalDownloads) / int(totalAuthored), 2)
hearts_average = round(int(total_hearts) / int(totalAuthored), 2)
Expand Down
12 changes: 8 additions & 4 deletions api/changelog.py
Expand Up @@ -3,11 +3,14 @@
from urllib.parse import urlparse, parse_qs


def scrape(id, selector):
def scrape(id):
url = f'https://routinehub.co/shortcut/{id}/changelog'
req = requests.get(url)
req.raise_for_status()
soup = bs4.BeautifulSoup(req.text, 'html.parser')
return soup

def extract(soup, selector):
elems = soup.select(selector)
return elems

Expand All @@ -17,12 +20,13 @@ def do_GET(self):
parsed_path = urlparse(self.path)
path = '?' + parsed_path.query
RoutineHubID = parse_qs(path[1:])["id"][0]
text = scrape(RoutineHubID, '#content > div > div.versions')
soup = scrape(RoutineHubID)
text = extract(soup, '#content > div > div.versions')

data = {
"id":RoutineHubID
}
elems = scrape(RoutineHubID, '#content > div > div.heading > h4')
elems = extract(soup, '#content > div > div.heading > h4')
res = elems[0].text.strip()
data["name"] = res
# Get number of versions
Expand All @@ -37,7 +41,7 @@ def do_GET(self):
"versions":[]
}
while i != 0:
text = scrape(RoutineHubID, f'#content > div > div.versions > article:nth-child({str(child)})')
text = extract(soup, f'#content > div > div.versions > article:nth-child({str(child)})')
version = str(text).split('\n')[4].split('</strong>')[0].replace('<strong>', '')
versionData = {
"version":version
Expand Down
16 changes: 8 additions & 8 deletions api/stats.py
Expand Up @@ -2,20 +2,19 @@
from http.server import BaseHTTPRequestHandler
from urllib.parse import urlparse, parse_qs

def scrape(id, selector):
def scrape(id):
url = f'https://routinehub.co/shortcut/{id}/'
req = requests.get(url)
req.raise_for_status()
soup = bs4.BeautifulSoup(req.text, 'html.parser')
return soup

def extract(soup, selector):
elems = soup.select(selector)
res = elems[0].text.strip()
return res

def scrapeDownloads(id):
url = f'https://routinehub.co/shortcut/{id}/'
req = requests.get(url)
req.raise_for_status()
soup = bs4.BeautifulSoup(req.text, 'html.parser')
def scrapeDownloads(soup):
res = soup.select('.information > p ')[0].select('p')[3].text
res = res.split('Downloads: ')[1]
return res
Expand All @@ -26,8 +25,9 @@ def do_GET(self):
parsed_path = urlparse(self.path)
path = '?' + parsed_path.query
RoutineHubID = parse_qs(path[1:])["id"][0]
hearts = scrape(RoutineHubID, '#content > div > div > div.column.sidebar.is-2 > div.heart.has-text-centered')
downloads = scrapeDownloads(RoutineHubID)
soup = scrape(RoutineHubID)
hearts = extract(soup, '#content > div > div > div.column.sidebar.is-2 > div.heart.has-text-centered')
downloads = scrapeDownloads(soup)
data = {
"id":RoutineHubID,
"hearts":hearts,
Expand Down

1 comment on commit 84efac5

@vercel
Copy link

@vercel vercel bot commented on 84efac5 Jan 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.