Skip to content

Commit

Permalink
Corrigi muitos bugs e atualizei código
Browse files Browse the repository at this point in the history
  • Loading branch information
fccoelho committed Oct 11, 2023
1 parent c9c093a commit 5c56915
Show file tree
Hide file tree
Showing 12 changed files with 1,810 additions and 20,445 deletions.
18,625 changes: 557 additions & 18,068 deletions Book/Capitulo_1.ipynb

Large diffs are not rendered by default.

686 changes: 297 additions & 389 deletions Book/Capitulo_2.ipynb

Large diffs are not rendered by default.

471 changes: 187 additions & 284 deletions Book/Capitulo_3.ipynb

Large diffs are not rendered by default.

1,354 changes: 613 additions & 741 deletions Book/Capitulo_4.ipynb

Large diffs are not rendered by default.

1,052 changes: 126 additions & 926 deletions Book/Capitulo_5.ipynb

Large diffs are not rendered by default.

32 changes: 9 additions & 23 deletions Book/Capitulo_6.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,25 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": null,
"id": "88ed1d45-d9f3-4008-ae6f-d0b681d7b28f",
"metadata": {
"tags": [
"hide-output"
]
],
"is_executing": true,
"ExecuteTime": {
"start_time": "2023-10-11T14:40:36.447117094Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fetching results for covid19\n",
"Found 500 items\n",
" 26%|██████████▍ | 128/500 [01:32<03:51, 1.61it/s]Downloading of 36571519 failed. HTTP Error 400: Bad Request\n",
" 99%|████████████████████████████████████████▌| 494/500 [05:53<00:04, 1.46it/s]Downloading of 36567860 failed. HTTP Error 400: Bad Request\n",
"100%|█████████████████████████████████████████| 500/500 [05:58<00:00, 1.40it/s]\n",
"Fetching results for covid-19\n",
"Found 500 items\n",
"100%|█████████████████████████████████████████| 500/500 [00:31<00:00, 15.84it/s]\n",
"Fetching results for sars-cov-2\n",
"Found 500 items\n",
" 45%|██████████████████▍ | 225/500 [00:55<03:13, 1.42it/s]Downloading of 36564060 failed. HTTP Error 400: Bad Request\n",
" 65%|██████████████████████████▋ | 326/500 [02:08<02:25, 1.20it/s]Downloading of 36560681 failed. HTTP Error 400: Bad Request\n",
"100%|█████████████████████████████████████████| 500/500 [04:26<00:00, 1.88it/s]\n",
"Fetching results for 2019-ncov\n",
"Found 500 items\n",
"100%|████████████████████████████████████████| 500/500 [00:01<00:00, 372.41it/s]\n",
"Fetching results for novel coronavirus\n",
"Found 500 items\n",
"100%|█████████████████████████████████████████| 500/500 [00:10<00:00, 49.21it/s]\n",
"Updating citations...\n",
"100%|███████████████████████████████████████| 2054/2054 [18:36<00:00, 1.84it/s]\n"
"Fetching results for covid19\r\n",
"Found 500 items\r\n",
" 27%|███████████▏ | 136/500 [01:48<04:48, 1.26it/s]"
]
}
],
Expand Down
File renamed without changes.
24 changes: 12 additions & 12 deletions Book/fetch2sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
from datetime import date
from ratelimiter import RateLimiter
# from ratelimiter import RateLimiter
import warnings
warnings.filterwarnings('ignore')

Expand Down Expand Up @@ -74,16 +74,16 @@ def __init__(self, email, search_term, count=9999, collection='articles'):
self.citation_table = 'citations' if collection == 'articles' else f"citations_{collection}"
with engine.connect() as connection:
connection.execute(
f"create table IF NOT EXISTS {self.articles_table}(pmid integer unique, title varchar(1024), abstract text, journal varchar(512), pubdate date);")
text(f"create table IF NOT EXISTS {self.articles_table}(pmid integer unique, title varchar(1024), abstract text, journal varchar(512), pubdate date);"))
connection.execute(
f"create table IF NOT EXISTS {self.citation_table}(pmid integer unique, cited_by text);")
text(f"create table IF NOT EXISTS {self.citation_table}(pmid integer unique, cited_by text);"))
connection.execute(
f"create unique index IF NOT EXISTS pmid_idx on {self.articles_table} (pmid)")
text(f"create unique index IF NOT EXISTS pmid_idx on {self.articles_table} (pmid)"))
connection.execute(
f"create unique index IF NOT EXISTS pmid_idx on {self.citation_table} (pmid)")
text(f"create unique index IF NOT EXISTS pmid_idx on {self.citation_table} (pmid)"))


@RateLimiter(max_calls=4, period=1)
# @RateLimiter(max_calls=4, period=1)
def _fetch(self, pmid):
try:
handle = Entrez.efetch(db="pubmed", id=pmid, retmode='xml')
Expand All @@ -99,7 +99,7 @@ def _fetch(self, pmid):
def _get_old_ids(self):
with engine.connect() as connection:
res = connection.execute(
f"select pmid from {self.articles_table};")
text(f"select pmid from {self.articles_table};"))
oldids = res.fetchall()

return [i[0] for i in oldids]
Expand All @@ -121,9 +121,9 @@ def _get_citations(self, pmid):

def create_FT_index(self):
with engine.connect() as connection:
query = f"""CREATE VIRTUAL TABLE IF NOT EXISTS article_fts USING fts5(title, abstract, content={self.articles_table},tokenize='porter ascii', content_rowid='pmid');
"""
query2 = f"INSERT INTO article_fts (rowid, title, abstract) SELECT pmid, title, abstract FROM {self.articles_table};"
query = text(f"""CREATE VIRTUAL TABLE IF NOT EXISTS article_fts USING fts5(title, abstract, content={self.articles_table},tokenize='porter ascii', content_rowid='pmid');
""")
query2 = text(f"INSERT INTO article_fts (rowid, title, abstract) SELECT pmid, title, abstract FROM {self.articles_table};")
connection.execute(query)
connection.execute(query2)

Expand All @@ -143,8 +143,8 @@ def update_citations_concurrently(self):
else:
with engine.connect() as connection:
cits = '|'.join(cits)
connection.execute(f"insert into {self.citation_table} VALUES(%s, %s) \
on conflict (pmid) do update set cited_by=%s;", (pmid, cits, cits))
connection.execute(text(f"insert into {self.citation_table} VALUES(%s, %s) \
on conflict (pmid) do update set cited_by=%s;", (pmid, cits, cits)))
except Exception as e:
print(f'{pmid} generated an exception: {e}')

Expand Down
2 changes: 1 addition & 1 deletion Book/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ A seleção de assuntos para inclusão neste livro é baseada na demanda dos lei
Este livro está baseado em uma combinação de cursos sobre o assunto ministrados por [mim](https://github.com/fccoelho) ao longo dos anos. Procuro manter o conteúdo acessível a pessoas com menos experiência em programação em Python. Caso você, leitor, se depare com trechos técnicamente muito avançados, fique à vontade para abrir uma `issue` explicando como acha que podemos tornar o conteúdo mais acessível.

Caso queira apoiar este projeto, faz um PIX de qualquer valor, usando o código QR abaixo:
<img src="./PIX_QR_only.jpg" width=300px>

![doação](./PIX_QR_only.jpg)


Binary file modified Book/minha_tabela.csv.gz
Binary file not shown.
3 changes: 2 additions & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env bash
jb build -W -n --keep-going Book/
ghp-import -n -p -f _build/html
cp -R Book/_build/html/* _build/html/
ghp-import -n -p -f Book/_build/html


6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ biopython = "^1.81"
ratelimiter = "^1.2.0.post0"
click = "^8.1.7"
scholarly = "^1.7.11"
matplotlib = "^3.8.0"
whoosh = "^2.7.4"
ipywidgets = "^8.1.1"
networkx = "^3.1"
plotly = "^5.17.0"
pot = "^0.9.1"


[tool.poetry.group.dev.dependencies]
Expand Down

0 comments on commit 5c56915

Please sign in to comment.