   # Part10: Run Notebooks


In [1]:
import os
import sys
import time
import datetime
import touch
import inspect
import numpy
import pandas as pd
print(sys.version_info)

sys.version_info(major=3, minor=9, micro=2, releaselevel='final', serial=0)


In [2]:
mlist = list(filter(lambda x: inspect.ismodule(x[1]), locals().items()))
vi = sys.version_info
print("version {0}.{1}.{2} of Python".format(vi.major, vi.minor, vi.micro))
for name, mod in mlist:
    mname = name
    if name.startswith("__"):
        continue
    if hasattr(mod, "__version__"):
        mname = name
        if hasattr(mod, "__path__"):
            mname = os.path.split(mod.__path__[0])[1]
        print("version {1} of {0} as {2} ".format(mname, name, mod.__version__))
    elif hasattr(mod, "__file__") and "site-packages" in mod.__file__:
        print("No __version__ for {0} as {1}".format(mname, name))
del mod
del name

version 3.9.2 of Python
No __version__ for touch as touch
version numpy of numpy as 1.20.1 
version pd of pandas as 1.2.3 


In [3]:
# to get the names of notebook files in a directory
def get_project_notebooks():
    from nbformat import read, NO_CONVERT
    import os
    import datetime
    proj_nbs = []
    nbs = [f for f in os.listdir() if f.endswith(".ipynb")]
    part_nbs = [f for f in nbs if f.startswith("Part")]
    return(part_nbs)


In [4]:
# print the source code for a cell
def print_source(cell):
    print("type cell= {0}".format(type(cell)))
    src = cell["source"]
    lines = src.split("\n")
    print("{0} lines".format(len(lines)))
    for i,line in enumerate(lines):
        print("({0}){1}".format(i, line))

# run notebook code
def run_nb_code(nbfile):
    from nbformat import read, NO_CONVERT
    import os
    import datetime
    res = "Start time: " + str(datetime.datetime.now())
    with open(nbfile) as fp:
        notebook = read(fp, NO_CONVERT)
    cells = notebook['cells']
    code_cells = [c for c in cells if c['cell_type'] == 'code']
    for i, cell in enumerate(code_cells):
        src = cell['source']
        #print("{0}<{1}>".format(li, line))                
        try:
            exec(src, globals(), locals())
        except Exception as e:
            print("Error on cell {0}\n".format(i))
            print(src)
            print_source(cell)
            exc_type, exc_obj, exc_tb = sys.exc_info()
            import traceback
            traceback.print_tb(exc_tb)
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            print(exc_type, fname, exc_tb.tb_lineno)
            return((exc_type, exc_obj, exc_tb, fname))
    res += "end: " + str(datetime.datetime.now())
    return res
# run notebook code
def get_first_cell_line(nbfile):
    from nbformat import read, NO_CONVERT
    import os
    import datetime
    res = "Start time: " + str(datetime.datetime.now())
    with open(nbfile) as fp:
        notebook = read(fp, NO_CONVERT)
    cells = notebook['cells']
    if not cells:
        return ""
    lines = cells[0]["source"].split("\n")
    if not lines:
        return ""
    return lines[0]


In [5]:
def get_run_order(nb_names, verbosity=0):
  tups = []
  not_run = []
  
  for nb_name in nb_names:
    if verbosity > 0:
      print(nb_name)
    name_parts = nb_name.split("_")
    if len(name_parts) < 2:
      not_run.append(("name doesn't start with 'Part'", nb_name))
      continue
    elif name_parts[1] == "99":
      not_run.append(("runall", nb_name))
      continue
    try:
      id = int(name_parts[1])
      tups.append((id, nb_name))
    except:
      not_run.append("can't convert {0} to int".format(name_parts[1]), nb_name)
  tups.sort()
  to_run = [x[1] for x in tups]
  return to_run, not_run
  
  
nb_names = get_project_notebooks()

to_run, not_run = get_run_order(nb_names, verbosity=0)
print(to_run)

['Part_0_Setup.ipynb', 'Part_1_Intro_Overview.ipynb', 'Part_2_DataSources.ipynb', 'Part_3_Fred.ipynb', 'Part_4_Weekly_Claims.ipynb', 'Part_5_UMich_Sentiment.ipynb', 'Part_6_GoogleTrends.ipynb', 'Part_7_Plot_Claims.ipynb', 'Part_8_Combine_Datasets.ipynb']


In [13]:
%%capture --no-stderr nb_out
def extract_part(name):
  parts = name.split("_")
  try:
    part = int(parts[1])
    return part
  except:
    exc_type, exc_obj, exc_tb = sys.exc_info()
    fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
    print(exc_type, fname, exc_tb.tb_lineno)     
    
for nb_name in to_run:
    part = extract_part(nb_name)
    if part < 7:
      continue
    sys.stderr.write("{0}  <{1}>\n".format(nb_name, datetime.datetime.now()))
    %run $nb_name
    sys.stderr.write("  done  <{0}>\n".format(nb_name, datetime.datetime.now()))    

Part_7_Plot_Claims.ipynb  <2021-05-10 23:45:47.022320>
  done  <Part_7_Plot_Claims.ipynb>
Part_8_Combine_Datasets.ipynb  <2021-05-10 23:45:47.500103>
  done  <Part_8_Combine_Datasets.ipynb>


In [14]:
print(nb_out.stdout)

google_trends  shape:(103, 21)
tidy_fred  shape:(897, 18)
umich_exp  shape:(159, 5)
us_pau_claims  shape:(58, 4)
date                        object
ICSA: Initial Claims       float64
CCSA: Continued Claims     float64
dtype: object
date shape: (897,)
 done, <2021-05-10 23:45:47.098657>
Index(['ICSA: Initial Claims', 'CCSA: Continued Claims ', 'PUA IC', 'PUA CC',
       'PEUC CC'],
      dtype='object')
['google_trends.csv', 'tidy_fred.csv', 'umich_exp.csv', 'us_pau_claims.csv']
google_trends.csv
           date  amazon jobs hiring
102  2021-05-02                  14
2021-05-02
tidy_fred.csv
           date  PAYEMS:Total Nonfarm
896  2021-05-01                   NaN
2021-05-02
umich_exp.csv
           date  Personal Finance Expected
158  2021-03-15                        118
2021-05-02
us_pau_claims.csv
          date  PUA IC
57  2021-05-08  101214
2021-05-08
124 7
AxesSubplot(0.125,0.125;0.775x0.755)
(15, 3)

