# Crossref pipeline

We have a simple task, we want to retireve a list of papers that is close to the universe of papers for a particular search string (using the paper title). We are going to use Crossref data and use it to query and get the publication DOIs and download the corresponding PDFs when is possible (we're doing low-key a crime here by using SciHub as a source of PDFs, but more on that later). 

In [44]:
import os
import duckdb
from scidownl import scihub_download

### How many papers in adaptation we have in the Crossref database by year?

In [51]:
duckdb.query(
    """
    SELECT title, 
    type, 
    url, 
    list_extract(list_extract(published['date-parts'], 1),1) as year 
    FROM read_parquet('/mnt/drive/crossref_april_parquet/*.parquet', union_by_name=true)
    WHERE regexp_matches(title, 'climate adaptation|adaptation to climate change')  
    AND type = 'journal-article' order by 4;
    """
)

┌──────────────────────┬───────────────────────────────────────────────────────┬─────────┬─────────┬─────────┬─────────┐
│     column_name      │                      column_type                      │  null   │   key   │ default │  extra  │
│       varchar        │                        varchar                        │ varchar │ varchar │ varchar │ varchar │
├──────────────────────┼───────────────────────────────────────────────────────┼─────────┼─────────┼─────────┼─────────┤
│ given_1              │ VARCHAR                                               │ YES     │ NULL    │ NULL    │ NULL    │
│ given_2              │ VARCHAR                                               │ YES     │ NULL    │ NULL    │ NULL    │
│ given_3              │ VARCHAR                                               │ YES     │ NULL    │ NULL    │ NULL    │
│ given_4              │ VARCHAR                                               │ YES     │ NULL    │ NULL    │ NULL    │
│ family_1             │ VARCHAR

In [52]:
# Try to get all the stuff we need!
adapt_papers = duckdb.query(
    """
    SELECT title, 
    container_title,
    doi, 
    url,
    family_1,
    list_extract(list_extract(published['date-parts'], 1),1) as year 
    FROM read_parquet('/mnt/drive/crossref_april_parquet/*.parquet', union_by_name=true)
    WHERE regexp_matches(title, 'climate adaptation|adaptation to climate change')  
    AND type = 'journal-article' order by 4;
    """
).to_df()

adapt_papers.head()

Unnamed: 0,title,container_title,doi,url,family_1,year
0,Human adaptation to climate change: An introdu...,American Journal of Human Biology,10.1002/ajhb.23530,http://dx.doi.org/10.1002/ajhb.23530,Pisor,2020
1,Beyond skeletal studies: A computational analy...,American Journal of Biological Anthropology,10.1002/ajpa.24932,http://dx.doi.org/10.1002/ajpa.24932,Bastir,2024
2,The spatial representation of business models ...,Business Strategy &amp; Development,10.1002/bsd2.92,http://dx.doi.org/10.1002/bsd2.92,DiBella,2019
3,Business adaptation to climate change: America...,Business Strategy and the Environment,10.1002/bse.2316,http://dx.doi.org/10.1002/bse.2316,Rivera,2019
4,A relational view of climate adaptation in the...,Business Strategy and the Environment,10.1002/bse.2375,http://dx.doi.org/10.1002/bse.2375,Canevari‐Luzardo,2019


In [54]:
def download_paper(doi_url, title, lastname, path_pdfs):
    file_name = f"{'_'.join([lastname, title.split(' ')[0]])}.pdf"

    out = os.path.join(path_pdfs, file_name)
    proxies = {"http": "socks5://127.0.0.1:7890"}
    scihub_download(doi_url, paper_type="doi", out=out, proxies=proxies)

In [55]:
for idx, row in adapt_papers.iterrows():
    download_paper(row["doi"], row["title"], row["family_1"], "./pdfs/")

[1m[INFO][0m | [32m2024/10/01 07:42:30[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:42:30[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ajhb.23530], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:33[0m | [1m-> Response: status_code=200, content_length=7842[0m
[1m[INFO][0m | [32m2024/10/01 07:42:33[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2020-11-25/da/10.1002@ajhb.23530.pdf', 'title': 'Human adaptation to climate change  An introduction to the special issue. American Journal of Human Biology, 33(4)'}[0m




[1m[INFO][0m | [32m2024/10/01 07:42:34[0m | [1m↓ Successfully download the url to: ./pdfs/Pisor_Human.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:42:34[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:42:34[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ajpa.24932], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:35[0m | [1m-> Response: status_code=200, content_length=5827[0m
[1m[INFO][0m | [32m2024/10/01 07:42:35[0m | [1mChoose scihub url [1]: http://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:42:35[0m | [1m<- Request: scihub_url=http://sci-hub.se, source=DoiSource[type=doi, id=10.1002/ajpa.24932], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:35[0m | [1mChoose scihub url [2]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:42:35[0m | [1m<- Request: scihub_url=https://sci-hub.se, sour



[1m[INFO][0m | [32m2024/10/01 07:42:41[0m | [1m↓ Successfully download the url to: ./pdfs/DiBella_The.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:42:41[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:42:41[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/bse.2316], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:42[0m | [1m-> Response: status_code=200, content_length=7833[0m
[1m[INFO][0m | [32m2024/10/01 07:42:42[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2019-10-23/05/rivera2019.pdf', 'title': 'Business adaptation to climate change  American ski resorts and warmer temperatures. Business Strategy and the Environment'}[0m




[1m[INFO][0m | [32m2024/10/01 07:42:43[0m | [1m↓ Successfully download the url to: ./pdfs/Rivera_Business.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:42:43[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:42:43[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/bse.2375], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:44[0m | [1m-> Response: status_code=200, content_length=8060[0m
[1m[INFO][0m | [32m2024/10/01 07:42:44[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2019-10-23/1a/10.1002@bse.2375.pdf', 'title': 'A relational view of climate adaptation in the private sector  How do value chain interactions shape business perceptions of climate risk and adaptive behaviours  Business Strategy and the Environmen'}[0m




[1m[INFO][0m | [32m2024/10/01 07:42:45[0m | [1m↓ Successfully download the url to: ./pdfs/Canevari‐Luzardo_A.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:42:45[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:42:45[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/cli2.51], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:46[0m | [1m-> Response: status_code=200, content_length=5824[0m
[1m[INFO][0m | [32m2024/10/01 07:42:46[0m | [1mChoose scihub url [1]: http://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:42:46[0m | [1m<- Request: scihub_url=http://sci-hub.se, source=DoiSource[type=doi, id=10.1002/cli2.51], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:46[0m | [1mChoose scihub url [2]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:42:46[0m | [1m<- Request: scihub_url=https://sci-hub.se, sou



[1m[INFO][0m | [32m2024/10/01 07:42:53[0m | [1m↓ Successfully download the url to: ./pdfs/Innes_The.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:42:53[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:42:53[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/eap.2765], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:53[0m | [1m-> Response: status_code=200, content_length=7489[0m
[1m[INFO][0m | [32m2024/10/01 07:42:53[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2022-11-05/19/zhu2022.pdf', 'title': ''}[0m




[1m[INFO][0m | [32m2024/10/01 07:42:54[0m | [1m↓ Successfully download the url to: ./pdfs/Zhu_Biodiversity.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:42:54[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:42:54[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ece3.10886], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:55[0m | [1m-> Response: status_code=200, content_length=5827[0m
[1m[INFO][0m | [32m2024/10/01 07:42:55[0m | [1mChoose scihub url [1]: http://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:42:55[0m | [1m<- Request: scihub_url=http://sci-hub.se, source=DoiSource[type=doi, id=10.1002/ece3.10886], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:42:55[0m | [1mChoose scihub url [2]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:42:55[0m | [1m<- Request: scihub_url=https://sci-hub.se,



[1m[INFO][0m | [32m2024/10/01 07:42:59[0m | [1m↓ Successfully download the url to: ./pdfs/Rodríguez_Genomic.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:42:59[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:42:59[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ece3.4550], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:00[0m | [1m-> Response: status_code=200, content_length=8042[0m
[1m[INFO][0m | [32m2024/10/01 07:43:00[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/tree/b3/3b/b33b3f0f50e652cd534fb4dc881eda59.pdf', 'title': 'Size does matter  Parallel evolution of adaptive thermal tolerance and body size facilitates adaptation to climate change in domestic cattle. Ecology and Evolution, 8(21), 10608–10620'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:02[0m | [1m↓ Successfully download the url to: ./pdfs/Elayadeth‐Meethal_Size.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:02[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:02[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ece3.8474], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:02[0m | [1m-> Response: status_code=200, content_length=7497[0m
[1m[INFO][0m | [32m2024/10/01 07:43:02[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2022-11-03/e6/fisher2021.pdf', 'title': ''}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:03[0m | [1m↓ Successfully download the url to: ./pdfs/Fisher_Genetic.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:03[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:03[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ecs2.2140], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:04[0m | [1m-> Response: status_code=200, content_length=7962[0m
[1m[INFO][0m | [32m2024/10/01 07:43:04[0m | [1m* Extracted information: {'url': 'https://zero.sci-hub.ru/6905/b8d90059bea3ad367b34d387efda3de5/halofsky2018.pdf', 'title': 'The nature of the beast  examining climate adaptation options in forests with stand-replacing fire regimes. Ecosphere, 9(3), e02140'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:06[0m | [1m↓ Successfully download the url to: ./pdfs/Halofsky_The.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:06[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:06[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ecy.1808], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:07[0m | [1m-> Response: status_code=200, content_length=7794[0m
[1m[INFO][0m | [32m2024/10/01 07:43:07[0m | [1m* Extracted information: {'url': 'https://dacemirror.sci-hub.ru/journal-article/f8dc63f521b655e3a9a3f116ef0245ca/fleishman2017.pdf', 'title': 'Societal transformation and climate adaptation. Ecology, 98(6), 1732–1733'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:08[0m | [1m↓ Successfully download the url to: ./pdfs/Fleishman_Societal.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:08[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:08[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/eet.1707], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:09[0m | [1m-> Response: status_code=200, content_length=7962[0m
[1m[INFO][0m | [32m2024/10/01 07:43:09[0m | [1m* Extracted information: {'url': 'https://dacemirror.sci-hub.ru/journal-article/31020923417ab4e7a23fc76c458fe839/wamsler2016.pdf', 'title': 'From Risk Governance to City-Citizen Collaboration  Capitalizing on individual adaptation to climate change. Environmental Policy and Governance, 26(3), 184–204'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:10[0m | [1m↓ Successfully download the url to: ./pdfs/Wamsler_From.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:10[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:10[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/eet.1865], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:11[0m | [1m-> Response: status_code=200, content_length=7911[0m
[1m[INFO][0m | [32m2024/10/01 07:43:11[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2019-10-22/99/patterson2019.pdf', 'title': 'Beyond inputs and outputs  Process‐oriented explanation of institutional change in climate adaptation governance. Environmental Policy and Governance'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:13[0m | [1m↓ Successfully download the url to: ./pdfs/Patterson_Beyond.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:13[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:13[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/eet.1982], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:14[0m | [1m-> Response: status_code=200, content_length=7493[0m
[1m[INFO][0m | [32m2024/10/01 07:43:14[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2022-11-04/16/glaas2022.pdf', 'title': ''}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:14[0m | [1m↓ Successfully download the url to: ./pdfs/Glaas_Disentangling.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:14[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:14[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/eet.530], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:15[0m | [1m-> Response: status_code=200, content_length=7943[0m
[1m[INFO][0m | [32m2024/10/01 07:43:15[0m | [1m* Extracted information: {'url': 'https://moscow.sci-hub.ru/122/b35986e638295875af3139361bca8eb8/10.1002@eet.530.pdf', 'title': 'Theclimate learning ladder.A pragmatic procedure to support climate adaptation. Environmental Policy and Governance, 20(1), 1–11'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:17[0m | [1m↓ Successfully download the url to: ./pdfs/Tàbara_The.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:17[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:17[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/fee.2716], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:17[0m | [1m-> Response: status_code=200, content_length=5825[0m
[1m[INFO][0m | [32m2024/10/01 07:43:17[0m | [1mChoose scihub url [1]: http://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:43:17[0m | [1m<- Request: scihub_url=http://sci-hub.se, source=DoiSource[type=doi, id=10.1002/fee.2716], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:17[0m | [1mChoose scihub url [2]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:43:17[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=Do



[1m[INFO][0m | [32m2024/10/01 07:43:21[0m | [1m↓ Successfully download the url to: ./pdfs/Sanderson_Database.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:21[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:21[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ird.2498], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:22[0m | [1m-> Response: status_code=200, content_length=7884[0m
[1m[INFO][0m | [32m2024/10/01 07:43:22[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2020-07-07/39/10.1002@ird.2498.pdf', 'title': 'Water and agricultural policies in Iranian macro‐level documents from the perspective of adaptation to climate change. Irrigation and Drainage'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:23[0m | [1m↓ Successfully download the url to: ./pdfs/Heydari_Water.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:23[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:23[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/jaa2.20], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:24[0m | [1m-> Response: status_code=200, content_length=7491[0m
[1m[INFO][0m | [32m2024/10/01 07:43:24[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2022-11-05/bbd2/lee2022.pdf', 'title': ''}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:24[0m | [1m↓ Successfully download the url to: ./pdfs/Lee_Agricultural.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:24[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:24[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/jeq2.20383], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:25[0m | [1m-> Response: status_code=200, content_length=7505[0m
[1m[INFO][0m | [32m2024/10/01 07:43:25[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2022-11-05/7d/gutknecht2022.pdf', 'title': ''}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:26[0m | [1m↓ Successfully download the url to: ./pdfs/Gutknecht_Cover.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:26[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:26[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/jid.3567], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:26[0m | [1m-> Response: status_code=200, content_length=7897[0m
[1m[INFO][0m | [32m2024/10/01 07:43:26[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2021-08-16/b1/kundo2021.pdf', 'title': 'Mainstreaming climate adaptation into social protection  The issues yet to be addressed. Journal of International Development, 33(6), 953–974'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:28[0m | [1m↓ Successfully download the url to: ./pdfs/Kundo_Mainstreaming.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:28[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:28[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/joc.6996], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:29[0m | [1m-> Response: status_code=200, content_length=7905[0m
[1m[INFO][0m | [32m2024/10/01 07:43:29[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2021-05-13//f5/reggiani2021.pdf', 'title': 'Assessing uncertainty for decision‐making in climate adaptation and risk mitigation. International Journal of Climatology, 41(5),'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:31[0m | [1m↓ Successfully download the url to: ./pdfs/Reggiani_Assessing.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:31[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:31[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/joc.707], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:33[0m | [1m-> Response: status_code=200, content_length=8177[0m
[1m[INFO][0m | [32m2024/10/01 07:43:33[0m | [1m* Extracted information: {'url': 'https://moscow.sci-hub.ru/159/8691ef86a130beb80efbb5738a48e853/harrison2002.pdf', 'title': 'Vulnerability and adaptation to climate change for Bangladesh, Edited by S.Huq, Z.Karim, M.Asaduzzaman and F.Mahtab. Kluwer Academic Publishers, Dordrecht, 1999. No. of pages  XV+147. Price NLG160, U'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:34[0m | [1m↓ Successfully download the url to: ./pdfs/Harrison_Vulnerability.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:34[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:34[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/pan3.10075], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:35[0m | [1m-> Response: status_code=200, content_length=7862[0m
[1m[INFO][0m | [32m2024/10/01 07:43:35[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2020-07-11/aa/soubry2020.pdf', 'title': 'Farming along desire lines  Collective action and food systems adaptation to climate change. People and Nature, 2(2), 420–436'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:37[0m | [1m↓ Successfully download the url to: ./pdfs/Soubry_Farming.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:37[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:37[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/ppp3.10126], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:37[0m | [1m-> Response: status_code=200, content_length=7992[0m
[1m[INFO][0m | [32m2024/10/01 07:43:37[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2020-07-16/ed/mills2020.pdf', 'title': 'Ecosystem‐based adaptation to climate change  Lessons learned from a pioneering project spanning Mauritania, Nepal, the Seychelles, and China. PLANTS, PEOPLE, PLANET'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:39[0m | [1m↓ Successfully download the url to: ./pdfs/Mills_Ecosystem‐based.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:39[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:39[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/sd.2801], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:39[0m | [1m-> Response: status_code=200, content_length=5824[0m
[1m[INFO][0m | [32m2024/10/01 07:43:40[0m | [1mChoose scihub url [1]: http://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:43:40[0m | [1m<- Request: scihub_url=http://sci-hub.se, source=DoiSource[type=doi, id=10.1002/sd.2801], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:40[0m | [1mChoose scihub url [2]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:43:40[0m | [1m<- Request: scihub_url=https://sci-hub.se, 



[1m[INFO][0m | [32m2024/10/01 07:43:46[0m | [1m↓ Successfully download the url to: ./pdfs/Jones_20,000.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:46[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:46[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.127], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:47[0m | [1m-> Response: status_code=200, content_length=7846[0m
[1m[INFO][0m | [32m2024/10/01 07:43:47[0m | [1m* Extracted information: {'url': 'https://zero.sci-hub.ru/562/20621c39294cc5023074730948972067/mawdsley2011.pdf', 'title': 'Design of conservation strategies for climate adaptation. Wiley Interdisciplinary Reviews  Climate Change, 2(4), 498–515'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:48[0m | [1m↓ Successfully download the url to: ./pdfs/Mawdsley_Design.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:48[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:48[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.204], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:49[0m | [1m-> Response: status_code=200, content_length=7880[0m
[1m[INFO][0m | [32m2024/10/01 07:43:49[0m | [1m* Extracted information: {'url': 'https://dacemirror.sci-hub.ru/journal-article/f7c56a150ecb285ae80735469c7952b8/naess2012.pdf', 'title': 'The role of local knowledge in adaptation to climate change. Wiley Interdisciplinary Reviews  Climate Change, 4(2), 99–106'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:50[0m | [1m↓ Successfully download the url to: ./pdfs/Naess_The.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:50[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:50[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.214], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:51[0m | [1m-> Response: status_code=200, content_length=8061[0m
[1m[INFO][0m | [32m2024/10/01 07:43:51[0m | [1m* Extracted information: {'url': 'https://dacemirror.sci-hub.ru/journal-article/ec97f1ba0e1864368e931ef0b3f27a08/linnenluecke2013.pdf', 'title': 'Firm and industry adaptation to climate change  a review of climate adaptation studies in the business and management field. Wiley Interdisciplinary Reviews  Climate Change, 4(5), 397–416'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:52[0m | [1m↓ Successfully download the url to: ./pdfs/Linnenluecke_Firm.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:52[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:52[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.276], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:53[0m | [1m-> Response: status_code=200, content_length=7964[0m
[1m[INFO][0m | [32m2024/10/01 07:43:53[0m | [1m* Extracted information: {'url': 'https://moscow.sci-hub.ru/2472/eae9dc6f0522aea266539928bd5d17c5/moser2014.pdf', 'title': 'Communicating adaptation to climate change  the art and science of public engagement when climate change comes home. Wiley Interdisciplinary Reviews  Climate Change, 5(3), 337–358'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:56[0m | [1m↓ Successfully download the url to: ./pdfs/Moser_Communicating.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:56[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:56[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.30], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:43:56[0m | [1m-> Response: status_code=200, content_length=7895[0m
[1m[INFO][0m | [32m2024/10/01 07:43:56[0m | [1m* Extracted information: {'url': 'https://zero.sci-hub.ru/562/b8b8277a599131bf581524926c2deec5/10.1002@wcc.30.pdf', 'title': 'A values-based approach to vulnerability and adaptation to climate change. Wiley Interdisciplinary Reviews  Climate Change, 1(2), 232–242'}[0m




[1m[INFO][0m | [32m2024/10/01 07:43:59[0m | [1m↓ Successfully download the url to: ./pdfs/O'Brien_A.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:43:59[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:43:59[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.402], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:00[0m | [1m-> Response: status_code=200, content_length=8135[0m
[1m[INFO][0m | [32m2024/10/01 07:44:00[0m | [1m* Extracted information: {'url': 'https://zero.sci-hub.ru/5559/7bf6f38cf0d7d6ee65d661128f59246f/averchenkova2016.pdf', 'title': 'Multinational and large national corporations and climate adaptation  are we asking the right questions  A review of current knowledge and a new research perspective. Wiley Interdisciplinary Reviews'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:01[0m | [1m↓ Successfully download the url to: ./pdfs/Averchenkova_Multinational.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:01[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:01[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.409], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:02[0m | [1m-> Response: status_code=200, content_length=7964[0m
[1m[INFO][0m | [32m2024/10/01 07:44:02[0m | [1m* Extracted information: {'url': 'https://dacemirror.sci-hub.ru/journal-article/63527de549f58bdd7cef7b58b4cb48cf/magnan2016.pdf', 'title': 'Addressing the risk of maladaptation to climate change. Wiley Interdisciplinary Reviews  Climate Change, 7(5), 646–665'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:04[0m | [1m↓ Successfully download the url to: ./pdfs/Magnan_Addressing.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:04[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:04[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.475], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:05[0m | [1m-> Response: status_code=200, content_length=7974[0m
[1m[INFO][0m | [32m2024/10/01 07:44:05[0m | [1m* Extracted information: {'url': 'https://zero.sci-hub.ru/6430/80fcf9611829e65c96e8f7b9e59cdfb1/klenk2017.pdf', 'title': 'Local knowledge in climate adaptation research  moving knowledge frameworks from extraction to co-production. Wiley Interdisciplinary Reviews  Climate Change, 8(5), e475'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:06[0m | [1m↓ Successfully download the url to: ./pdfs/Klenk_Local.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:06[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:06[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.505], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:07[0m | [1m-> Response: status_code=200, content_length=7905[0m
[1m[INFO][0m | [32m2024/10/01 07:44:07[0m | [1m* Extracted information: {'url': 'https://zero.sci-hub.ru/6759/e06188eb992f1b3e964bcbfa0fe68d3a/crick2018.pdf', 'title': 'Enabling private sector adaptation to climate change in sub-Saharan Africa. Wiley Interdisciplinary Reviews  Climate Change, 9(2), e505'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:08[0m | [1m↓ Successfully download the url to: ./pdfs/Crick_Enabling.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:08[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:08[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.51], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:09[0m | [1m-> Response: status_code=200, content_length=7982[0m
[1m[INFO][0m | [32m2024/10/01 07:44:09[0m | [1m* Extracted information: {'url': 'https://dacemirror.sci-hub.ru/journal-article/a429439cc800f3de675fd6fa8ca12907/mcleman2010.pdf', 'title': 'Migration in the context of vulnerability and adaptation to climate change  insights from analogues. Wiley Interdisciplinary Reviews  Climate Change, 1(3), 450–461'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:11[0m | [1m↓ Successfully download the url to: ./pdfs/McLeman_Migration.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:11[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:11[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.553], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:12[0m | [1m-> Response: status_code=200, content_length=7900[0m
[1m[INFO][0m | [32m2024/10/01 07:44:12[0m | [1m* Extracted information: {'url': 'https://zero.sci-hub.ru/7134/7c23f04dbb499547bcbdc367f83e3267/hamilton2018.pdf', 'title': 'Behavioral adaptation to climate change in wildfire-prone forests. Wiley Interdisciplinary Reviews  Climate Change, e553'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:13[0m | [1m↓ Successfully download the url to: ./pdfs/Hamilton_Behavioral.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:13[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:13[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.616], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:14[0m | [1m-> Response: status_code=200, content_length=8004[0m
[1m[INFO][0m | [32m2024/10/01 07:44:14[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2019-10-23/7b/solecki2019.pdf', 'title': 'Extreme events and climate adaptation‐mitigation linkages  Understanding low‐carbon transitions in the era of global urbanization. Wiley Interdisciplinary Reviews  Climate Change'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:16[0m | [1m↓ Successfully download the url to: ./pdfs/Solecki_Extreme.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:16[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:16[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.638], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:16[0m | [1m-> Response: status_code=200, content_length=7839[0m
[1m[INFO][0m | [32m2024/10/01 07:44:16[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2020-04-17/12/kwakkel2020.pdf', 'title': 'Is real options analysis fit for purpose in supporting climate adaptation planning and decision‐making  WIREs Climate Change, 11(3)'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:17[0m | [1m↓ Successfully download the url to: ./pdfs/Kwakkel_Is.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:17[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:17[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.676], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:18[0m | [1m-> Response: status_code=200, content_length=7829[0m
[1m[INFO][0m | [32m2024/10/01 07:44:18[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2020-08-18/72/lucas2020.pdf', 'title': 'Privatizing climate adaptation  How insurance weakens solidaristic and collective disaster recovery. WIREs Climate Change'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:19[0m | [1m↓ Successfully download the url to: ./pdfs/Lucas_Privatizing.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:20[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:20[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.695], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:20[0m | [1m-> Response: status_code=200, content_length=7791[0m
[1m[INFO][0m | [32m2024/10/01 07:44:20[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2020-12-17/83/tubi2020.pdf', 'title': 'Beyond binary outcomes in climate adaptation  The illustrative case of desalination. WIREs Climate Change'}[0m




[1m[INFO][0m | [32m2024/10/01 07:44:22[0m | [1m↓ Successfully download the url to: ./pdfs/Tubi_Beyond.pdf[0m
[1m[INFO][0m | [32m2024/10/01 07:44:22[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:44:22[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=10.1002/wcc.856], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:23[0m | [1m-> Response: status_code=200, content_length=5824[0m
[1m[INFO][0m | [32m2024/10/01 07:44:23[0m | [1mChoose scihub url [1]: http://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:44:23[0m | [1m<- Request: scihub_url=http://sci-hub.se, source=DoiSource[type=doi, id=10.1002/wcc.856], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:44:23[0m | [1mChoose scihub url [2]: https://sci-hub.se[0m
[1m[INFO][0m | [32m2024/10/01 07:44:23[0m | [1m<- Request: scihub_url=https://sci-hub.se, source=Doi

In [9]:
duckdb.query(
    """
    SELECT 
    title, 
    type, 
    url, 
    abstract, 
    list_extract(list_extract(published['date-parts'], 1),1) as year 
    FROM read_parquet('/mnt/drive/crossref_april_parquet/*.parquet', union_by_name=true) 
    WHERE regexp_matches(title, 'climate adaptation|adaptation to climate change') AND 
    type = 'journal-article' and abstract IS NOT NULL
    ORDER by 5;
    """
)

┌──────────────────────┬─────────────────┬──────────────────────┬──────────────────────────────────────────────┬───────┐
│        title         │      type       │         url          │                   abstract                   │ year  │
│       varchar        │     varchar     │       varchar        │                   varchar                    │ int64 │
├──────────────────────┼─────────────────┼──────────────────────┼──────────────────────────────────────────────┼───────┤
│ Reducing hazard vu…  │ journal-article │ http://dx.doi.org/…  │ <jats:title>Abstract</jats:title><jats:p>O…  │  2006 │
│ Community‐based di…  │ journal-article │ http://dx.doi.org/…  │ <jats:title>Abstract</jats:title><jats:p>C…  │  2006 │
│ Local initiatives …  │ journal-article │ http://dx.doi.org/…  │ <jats:title>Abstract</jats:title><jats:p>C…  │  2006 │
│ Modeling agricultu…  │ journal-article │ http://dx.doi.org/…  │ <jats:p>An approach that integrates biophy…  │  2008 │
│ A multi-institutio…  │ journal

In [12]:
duckdb.query(
    """
    with clean as (
        SELECT list_extract(affiliation_1, 1) as first_author_affiliation,
        list_extract(list_extract(published['date-parts'], 1),1) as year
        FROM read_parquet('/mnt/drive/crossref_april_parquet_normalized/*.parquet', union_by_name=true)
        WHERE affiliation_1 IS NOT NULL
        ) SELECT first_author_affiliation, count(*) AS count 
        FROM clean 
        WHERE first_author_affiliation IS NOT NULL and year == 2020
        GROUP BY first_author_affiliation 
        ORDER by count desc;
    """
)

┌─────────────────────────────────────────────────────────────────────────────────────────┬───────┐
│                                first_author_affiliation                                 │ count │
│                                         varchar                                         │ int64 │
├─────────────────────────────────────────────────────────────────────────────────────────┼───────┤
│ University of Washington                                                                │ 37393 │
│ Department of Chemistry                                                                 │  3678 │
│ California Institute of Technology                                                      │  1110 │
│ University of Massachusets Medical School                                               │  1075 │
│ Los Alamos National Lab. (LANL), Los Alamos, NM (United States)                         │  1003 │
│ Stanford University                                                                     │   828 │


In [25]:
duckdb.query(
    """
    SELECT container_title, title, type, url, list_extract(list_extract(published['date-parts'], 1),1) as year
    FROM read_parquet('/mnt/drive/crossref_april_parquet_normalized/*.parquet', union_by_name=true)
    WHERE contains(container_title, 'development')
    and list_extract(list_extract(published['date-parts'], 1),1) == 2020
    and type = 'journal-article'
    """
)

┌──────────────────────┬──────────────────────┬─────────────────┬──────────────────────────────────────────────┬───────┐
│   container_title    │        title         │      type       │                     url                      │ year  │
│       varchar        │       varchar        │     varchar     │                   varchar                    │ int64 │
├──────────────────────┼──────────────────────┼─────────────────┼──────────────────────────────────────────────┼───────┤
│ Advances in Neurod…  │ Building Social Sk…  │ journal-article │ http://dx.doi.org/10.1007/s41252-020-00149-8 │  2020 │
│ Journal of Neurode…  │ A telehealth appro…  │ journal-article │ http://dx.doi.org/10.1186/s11689-019-9302-0  │  2020 │
│ Journal of Neurode…  │ Health comorbiditi…  │ journal-article │ http://dx.doi.org/10.1186/s11689-019-9306-9  │  2020 │
│ Advances in Neurod…  │ Mindfulness Traini…  │ journal-article │ http://dx.doi.org/10.1007/s41252-020-00148-9 │  2020 │
│ South of Russia: e…  │ Ecologi

In [34]:
from scidownl import scihub_download

paper = "https://doi.org/10.1145/3375633"
paper_type = "doi"
out = "./paper/one_paper.pdf"
proxies = {"http": "socks5://127.0.0.1:7890"}
scihub_download(paper, paper_type=paper_type, out=out, proxies=proxies)

[1m[INFO][0m | [32m2024/10/01 07:08:00[0m | [1mFound 6 valid SciHub domains in total: ['https://sci-hub.ru', 'http://sci-hub.se', 'https://sci-hub.se', 'http://sci-hub.ru', 'http://sci-hub.st', 'https://sci-hub.st'][0m
[1m[INFO][0m | [32m2024/10/01 07:08:00[0m | [1mSaved 6 SciHub domains to local db.[0m
[1m[INFO][0m | [32m2024/10/01 07:08:00[0m | [1mChoose scihub url [0]: https://sci-hub.ru[0m
[1m[INFO][0m | [32m2024/10/01 07:08:00[0m | [1m<- Request: scihub_url=https://sci-hub.ru, source=DoiSource[type=doi, id=doi.org/10.1145/3375633], proxies={'http': 'socks5://127.0.0.1:7890'}[0m
[1m[INFO][0m | [32m2024/10/01 07:08:02[0m | [1m-> Response: status_code=200, content_length=7868[0m
[1m[INFO][0m | [32m2024/10/01 07:08:02[0m | [1m* Extracted information: {'url': 'https://sci-hub.ru/downloads/2021-06-09/4a/beschastnikh2020.pdf', 'title': 'Visualizing Distributed System Executions. ACM Transactions on Software Engineering and Methodology, 29(2), 1–38'}[0m



[1m[INFO][0m | [32m2024/10/01 07:08:03[0m | [1m↓ Successfully download the url to: ./paper/one_paper.pdf[0m


In [26]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("http://dx.doi.org/10.1007/s41252-020-00151-0")

In [33]:
import pprint

docs = loader.load()

pprint.pp(docs[0].page_content)

('\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 'Factors Influencing Decisions About Prenatal Genetic Testing for Autism '
 'Among Mothers of Children with Autism Spectrum Disorders | Advances in '
 'Neurodevelopmental Disorders\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'
 '\n'

In [None]:
duckdb.query(
    """
    SELECT unnest(CAST(affiliation_1 AS VARCHAR[]), recursive := true)
    FROM read_parquet('/mnt/drive/crossref_april_parquet/*.parquet', union_by_name=true)
    WHERE affiliation_1 IS NOT NULL
    LIMIT 10;
    """
)

In [5]:
duckdb.query(
    """
    with clean_year as (
    select *, list_extract(list_extract(published['date-parts'], 1),1) as year,
    FROM read_parquet('/mnt/drive/crossref_april_parquet/*.parquet', union_by_name=true) 
    ) select year, affiliation_1, count(*)
    from clean_year
    where affiliation_1 is not null
    group by year, affiliation_1;
    """
)

TypeMismatchException: Mismatch Type Error: Type STRUCT("name" VARCHAR, place VARCHAR[]) does not match with STRUCT("name" VARCHAR). Cannot cast STRUCTs of different size