# Analysis on Notebook Exceptions

In [1]:
import sys

sys.path.insert(0, "../archaeology")
sys.path.insert(0, "../analysis")

from analysis_helpers import var
from analysis_helpers_executions import get_combined_reason

%matplotlib inline

In [2]:
# Get list of executions
combined_execution_reason_df = get_combined_reason()
combined_execution_reason_df.head(3)

Unnamed: 0,id,repository_id,notebook_id,mode,reason,msg,diff,cell,count,diff_count,timeout,duration,processed,skip,new_reason
1581,1582,288,1265,3,ModuleNotFoundError,"Traceback (most recent call last):\n File ""ru...",,-1.0,0.0,0.0,300.0,2.674515,55,0,ModuleNotFoundError
1583,1584,578,2721,3,ModuleNotFoundError,"Traceback (most recent call last):\n File ""ru...",,-1.0,0.0,0.0,300.0,6.561188,55,0,ModuleNotFoundError
1588,1589,114,455,3,,,,-1.0,0.0,0.0,300.0,81.996349,51,0,


In [3]:
# Get the number and percentage of exceptions that occured more than 10 times
# Filter the dataframe to exclude the 'None' values
# and count the occurrences of each exception
exception_counts = combined_execution_reason_df[
    combined_execution_reason_df["new_reason"] != "None"
]["new_reason"].value_counts()

# Get the exceptions that occur more than 10 times
exceptions_more_than_10 = exception_counts[exception_counts > 10]

# Calculate the total number of notebooks that were executed
total_notebooks = len(combined_execution_reason_df)

# Calculate the total number of notebooks with exceptions occurring more than 10 times
notebooks_exceptions_more_than_10 = len(
    combined_execution_reason_df[
        combined_execution_reason_df["new_reason"].isin(exceptions_more_than_10.index)
    ]
)

# Calculate the total number of notebooks with no exceptions
notebooks_no_exceptions = len(
    combined_execution_reason_df[combined_execution_reason_df["new_reason"].isna()]
)

# Calculate the total number of notebooks with exceptions occurring less than 10 times
notebooks_exceptions_less_than_10 = (
    total_notebooks - notebooks_exceptions_more_than_10 - notebooks_no_exceptions
)

# Calculate the percentage of notebooks with exceptions occurring more than 10 times
percentage_exceptions_more_than_10 = (
    notebooks_exceptions_more_than_10 / total_notebooks
) * 100

# Calculate the percentage of notebooks with no exceptions
percentage_no_exceptions = (notebooks_no_exceptions / total_notebooks) * 100

# Calculate the percentage of notebooks with exceptions occurring less than 10 times
percentage_exceptions_less_than_10 = (
    notebooks_exceptions_less_than_10 / total_notebooks
) * 100

In [4]:
# Print the exceptions that occur more than 10 times and their count
print("Exceptions occurring more than 10 times:")
print(exceptions_more_than_10)
print("Total number of notebooks that were executed:", total_notebooks)

Exceptions occurring more than 10 times:
ModuleNotFoundError                              5562
FileNotFoundError                                1102
ImportError                                      1026
NameError                                         390
IOError                                           147
AttributeError                                     94
ValueError                                         91
TypeError                                          85
KeyError                                           71
CalledProcessError                                 68
<Unknown exception>                                59
OSError                                            48
RuntimeError                                       48
SyntaxError                                        46
IndexError                                         28
LZMAError                                          24
HTTPError                                          22
PermissionError: [Errno 13] Permission de

In [5]:
# Print the percentage of notebooks with exceptions occurring more than 10 times,
# no exceptions, and less than 10 times
print(
    "Percentage of notebooks: Exceptions occurring more than 10 times: {}".format(
        var(
            "percentage_exceptions_more_than_10",
            percentage_exceptions_more_than_10,
            "{:.2f}",
        )
    )
)
print(
    "Percentage of notebooks: Exceptions occurring less than 10 times: {}".format(
        var(
            "percentage_exceptions_less_than_10",
            percentage_exceptions_less_than_10,
            "{:.2f}",
        )
    )
)
print(
    "Percentage of notebooks with no exceptions: {}".format(
        var("percentage_no_exceptions", percentage_no_exceptions, "{:.2f}")
    )
)

Percentage of notebooks: Exceptions occurring more than 10 times: 86.29
Percentage of notebooks: Exceptions occurring less than 10 times: 1.32
Percentage of notebooks with no exceptions: 12.39
