   # Part10: Run Notebooks


In [1]:
import os
import sys
import time
import datetime
import touch
import inspect
import numpy
import pandas as pd
print(sys.version_info)

sys.version_info(major=3, minor=9, micro=2, releaselevel='final', serial=0)


In [2]:
mlist = list(filter(lambda x: inspect.ismodule(x[1]), locals().items()))
vi = sys.version_info
print("version {0}.{1}.{2} of Python".format(vi.major, vi.minor, vi.micro))
for name, mod in mlist:
    mname = name
    if name.startswith("__"):
        continue
    if hasattr(mod, "__version__"):
        mname = name
        if hasattr(mod, "__path__"):
            mname = os.path.split(mod.__path__[0])[1]
        print("version {1} of {0} as {2} ".format(mname, name, mod.__version__))
    elif hasattr(mod, "__file__") and "site-packages" in mod.__file__:
        print("No __version__ for {0} as {1}".format(mname, name))
del mod
del name

version 3.9.2 of Python
No __version__ for touch as touch
version numpy of numpy as 1.20.1 
version pd of pandas as 1.2.3 


In [5]:
# to get the names of notebook files in a directory
def get_project_notebooks():
    from nbformat import read, NO_CONVERT
    import os
    import datetime
    proj_nbs = []
    nbs = [f for f in os.listdir() if f.endswith(".ipynb")]
    part_nbs = [f for f in nbs if f.startswith("Part")]
    return(part_nbs)


['Part_0_Setup.ipynb',
 'Part_1_Intro_Overview.ipynb',
 'Part_2_DataSources.ipynb',
 'Part_3_Fred.ipynb',
 'Part_4_Weekly_Claims.ipynb',
 'Part_5_UMich_Sentiment.ipynb',
 'Part_6_GoogleTrends.ipynb',
 'Part_7_Combine_claims.ipynb',
 'Part_8_Combine_Datasets.ipynb',
 'Part_99_Run_All.ipynb']

In [8]:
# print the source code for a cell
def print_source(cell):
    print("type cell= {0}".format(type(cell)))
    src = cell["source"]
    lines = src.split("\n")
    print("{0} lines".format(len(lines)))
    for i,line in enumerate(lines):
        print("({0}){1}".format(i, line))

# run notebook code
def run_nb_code(nbfile):
    from nbformat import read, NO_CONVERT
    import os
    import datetime
    res = "Start time: " + str(datetime.datetime.now())
    with open(nbfile) as fp:
        notebook = read(fp, NO_CONVERT)
    cells = notebook['cells']
    code_cells = [c for c in cells if c['cell_type'] == 'code']
    for i, cell in enumerate(code_cells):
        src = cell['source']
        #print("{0}<{1}>".format(li, line))                
        try:
            exec(src, globals(), locals())
        except Exception as e:
            print("Error on cell {0}\n".format(i))
            print(src)
            print_source(cell)
            exc_type, exc_obj, exc_tb = sys.exc_info()
            import traceback
            traceback.print_tb(exc_tb)
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            print(exc_type, fname, exc_tb.tb_lineno)
            return((exc_type, exc_obj, exc_tb, fname))
    res += "end: " + str(datetime.datetime.now())
    return res
# run notebook code
def get_first_cell_line(nbfile):
    from nbformat import read, NO_CONVERT
    import os
    import datetime
    res = "Start time: " + str(datetime.datetime.now())
    with open(nbfile) as fp:
        notebook = read(fp, NO_CONVERT)
    cells = notebook['cells']
    if not cells:
        return ""
    lines = cells[0]["source"].split("\n")
    if not lines:
        return ""
    return lines[0]


In [21]:
def get_run_order(nb_names, verbosity=0):
  tups = []
  not_run = []
  
  for nb_name in nb_names:
    if verbosity > 0:
      print(nb_name)
    name_parts = nb_name.split("_")
    if len(name_parts) < 2:
      not_run.append(("name doesn't start with 'Part'", nb_name))
      continue
    elif name_parts[1] == "99":
      not_run.append(("runall", nb_name))
      continue
    try:
      id = int(name_parts[1])
      tups.append((id, nb_name))
    except:
      not_run.append("can't convert {0} to int".format(name_parts[1]), nb_name)
  tups.sort()
  to_run = [x[1] for x in tups]
  return to_run, not_run
  
  
nb_names = get_project_notebooks()

to_run, not_run = get_run_order(nb_names, verbosity=0)
print(to_run)

['Part_0_Setup.ipynb', 'Part_1_Intro_Overview.ipynb', 'Part_2_DataSources.ipynb', 'Part_3_Fred.ipynb', 'Part_4_Weekly_Claims.ipynb', 'Part_5_UMich_Sentiment.ipynb', 'Part_6_GoogleTrends.ipynb', 'Part_7_Combine_claims.ipynb', 'Part_8_Combine_Datasets.ipynb']


In [27]:
%%capture --no-stderr nb_out
for nb_name in to_run:
    sys.stderr.write("{0}  <{1}>\n".format(nb_name, datetime.datetime.now()))
    %run $nb
    sys.stderr.write("  done  <{0}>\n".format(nb_name, datetime.datetime.now()))    

Part_0_Setup.ipynb  <2021-05-05 17:30:31.146525>
  done  <Part_0_Setup.ipynb>
Part_1_Intro_Overview.ipynb  <2021-05-05 17:30:51.891249>
  done  <Part_1_Intro_Overview.ipynb>
Part_2_DataSources.ipynb  <2021-05-05 17:31:12.936194>
  done  <Part_2_DataSources.ipynb>
Part_3_Fred.ipynb  <2021-05-05 17:31:33.859758>
  done  <Part_3_Fred.ipynb>
Part_4_Weekly_Claims.ipynb  <2021-05-05 17:31:54.418535>
  done  <Part_4_Weekly_Claims.ipynb>
Part_5_UMich_Sentiment.ipynb  <2021-05-05 17:32:15.866728>
  done  <Part_5_UMich_Sentiment.ipynb>
Part_6_GoogleTrends.ipynb  <2021-05-05 17:32:36.743354>
  done  <Part_6_GoogleTrends.ipynb>
Part_7_Combine_claims.ipynb  <2021-05-05 17:32:56.796807>
  done  <Part_7_Combine_claims.ipynb>
Part_8_Combine_Datasets.ipynb  <2021-05-05 17:33:16.517497>
  done  <Part_8_Combine_Datasets.ipynb>


In [29]:
nb_out.stdout

'version 3.9.2 of Python\nversion 1.20.1 of numpy\nversion 1.2.3 of pd\nversion 1.20.1 of np\nversion 3.3.4 of mpl\nversion 0.4.2 of fredapi\nversion 2.0.1 of xlrd\nversion 3.141.0 of selenium\nversion 3.14.1 of webdriver\nversion 2.2.1 of re\nversion 0.7.1 of pn\n0.4.2\nseries: USPRIV, obs_start: 2007-01-01, obs_end: 2021-05-05\nget_series_all_releases <2021-05-05 17:30:31.165489\nrows= 171\nseries: NPPTTL, obs_start: 2007-01-01, obs_end: 2021-05-05\nget_series_all_releases <2021-05-05 17:30:31.956680\nrows= 172\nseries: ICSA, obs_start: 2007-01-01, obs_end: 2021-05-05\nget_series_all_releases <2021-05-05 17:30:32.859096\nrows= 747\nseries: CCSA, obs_start: 2007-01-01, obs_end: 2021-05-05\nget_series_all_releases <2021-05-05 17:30:34.195306\nrows= 746\nseries: JTS1000JOL, obs_start: 2007-01-01, obs_end: 2021-05-05\nget_series_all_releases <2021-05-05 17:30:35.894419\nrows= 170\nseries: JTS1000HIL, obs_start: 2007-01-01, obs_end: 2021-05-05\nget_series_all_releases <2021-05-05 17:30:36