From dde9bf350b89565ad41e57eff0eada6c92a9e381 Mon Sep 17 00:00:00 2001 From: Daniel Chen Date: Wed, 8 Apr 2020 23:53:10 -0400 Subject: [PATCH] return error message if student forgets to update kgl data --- analysis/db/dan/load_data.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/analysis/db/dan/load_data.py b/analysis/db/dan/load_data.py index 899eac6..028767c 100644 --- a/analysis/db/dan/load_data.py +++ b/analysis/db/dan/load_data.py @@ -1,5 +1,6 @@ import pathlib as pl import json +import sys import pandas as pd from tqdm import tqdm @@ -37,10 +38,13 @@ def extract_paper_data(json_pth): ) return(paper_data) -fs = here("./data/db/original/kaggle/comm_use_subset/comm_use_subset/pdf_json").iterdir() +hr = here("./data/db/original/kaggle/comm_use_subset/comm_use_subset/pdf_json/") +fs = hr.iterdir() -# fs = list(fs) -fs = list(fs) +try: + fs = list(fs) +except FileNotFoundError: + sys.exit(f"Could not find {hr}, did you forget to update the Kaggle dataset?") papers = pd.concat( [extract_paper_data(jsn) for jsn in tqdm(fs)]