Skip to content

Commit

Permalink
Merge pull request #23 from databridgevt/forget_kaggle_update
Browse files Browse the repository at this point in the history
return error message if student forgets to update kgl data
  • Loading branch information
chendaniely committed Apr 9, 2020
2 parents 4bb4f15 + dde9bf3 commit cca7ddc
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions analysis/db/dan/load_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pathlib as pl
import json
import sys

import pandas as pd
from tqdm import tqdm
Expand Down Expand Up @@ -37,10 +38,13 @@ def extract_paper_data(json_pth):
)
return(paper_data)

fs = here("./data/db/original/kaggle/comm_use_subset/comm_use_subset/pdf_json").iterdir()
hr = here("./data/db/original/kaggle/comm_use_subset/comm_use_subset/pdf_json/")
fs = hr.iterdir()

# fs = list(fs)
fs = list(fs)
try:
fs = list(fs)
except FileNotFoundError:
sys.exit(f"Could not find {hr}, did you forget to update the Kaggle dataset?")

papers = pd.concat(
[extract_paper_data(jsn) for jsn in tqdm(fs)]
Expand Down

0 comments on commit cca7ddc

Please sign in to comment.