# Errors, logging and debugging

In [32]:
import pandas as pd
import numpy as np
from scipy.stats import linregress

In [33]:
df = pd.read_csv("../data/women_in_parliament_processed.csv")

In [34]:
df.head()

Unnamed: 0.1,Unnamed: 0,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,...,United Republic of Tanzania,United States of America,Uruguay,Uzbekistan,Vanuatu,Venezuela (Bolivarian Republic of),Viet Nam,Yemen,Zambia,Zimbabwe
0,2000,,5.16,3.16,7.14,15.45,5.26,28.02,3.05,22.45,...,22.18,13.33,12.12,6.8,0.0,12.14,26.0,0.66,10.13,14.0
1,2001,,5.16,3.42,7.14,15.45,5.26,26.46,3.05,22.97,...,22.18,14.02,12.12,7.2,0.0,9.7,26.0,0.66,10.13,9.33
2,2002,,5.71,3.42,14.29,15.45,5.26,30.74,3.05,25.33,...,22.26,14.02,12.12,7.2,0.0,9.7,26.0,0.67,12.03,10.0
3,2003,,5.71,6.17,14.29,15.45,5.26,30.74,3.05,25.33,...,22.26,14.25,12.12,7.2,1.92,9.7,27.31,0.67,12.03,10.0
4,2004,,5.71,6.17,14.29,15.45,5.26,30.74,4.58,25.33,...,21.36,14.25,12.12,7.2,1.92,9.7,27.31,0.33,12.03,10.0


In [35]:
timestamps = [int(i) for i in df.index.tolist()]

In [36]:
uk_parl = df["United Kingdom of Great Britain and Northern Ireland"].tolist()

In [39]:
def fit_trendline(year_timestamps):
    result = linregress(year_timestamps, data)
    slope = round(result.slope, 3)
    r_squared = round(result.rvalue**2, 3)
    return slope, r_squared

In [40]:
fit_trendline(timestamps)

ValueError: all the input array dimensions except for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 24 and the array at index 1 has size 3

In [41]:
def fit_trendline(year_timestamps, data):
    try:
        result = linregress(year_timestamps, data)
    except TypeError:
        print("Both lists must contain floats or integers.")
    else:
        slope = round(result.slope, 3)
        r_squared = round(result.rvalue**2, 3)
        return slope, r_squared

In [42]:
timestamps = ["2000", "2001", "2002"]

In [43]:
data = uk_parl[:3]

In [44]:
data

[18.36, 18.36, 17.91]

In [45]:
fit_trendline(timestamps, data)

Both lists must contain floats or integers.


### Raising your own errors

In [13]:
def fit_trendline(year_timestamps, data):
    if not year_timestamps or data:
        raise ValueError("Timestamps and data cannot be empty lists")
    result = linregress(year_timestamps, data)
    slope = round(result.slope, 3)
    r_squared = round(result.rvalue**2, 3)
    return slope, r_squared

In [14]:
fit_trendline([], [18.36, 18.36, 17.91])

ValueError: Timestamps and data cannot be empty lists

## Logging

### Logging in Python

In [15]:
import logging

In [16]:
logging.basicConfig(level=logging.DEBUG)

In [17]:
logging.basicConfig(filename="chapter_05_logs.log", level=logging.DEBUG)

In [18]:
logging.basicConfig(
    filename="chapter_05_logs.log",
    level=logging.DEBUG,
    format="%(asctime)s %(message)s",
)

In [19]:
def fit_trendline(year_timestamps, data):
    logging.info("Running fit_trendline function")
    result = linregress(year_timestamps, data)
    slope = round(result.slope, 3)
    r_squared = round(result.rvalue**2, 3)
    logging.info(f"Completed analysis. Slope of the trendline is {slope}.")
    return slope, r_squared

In [29]:
data = df["United Kingdom of Great Britain and Northern Ireland"].tolist()[:3]
data


[18.36, 18.36, 17.91]

In [31]:
fit_trendline(timestamps, data)

INFO:root:Running fit_trendline function


UFuncTypeError: ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U4'), dtype('<U4')) -> None

In [26]:
def fit_trendline(year_timestamps, data):
    logging.info("Running fit_trendline function")
    try:
        result = linregress(year_timestamps, data)
    except TypeError as e:
        logging.error("Both lists must contain floats or integers.")
        logging.exception(e)
    else:
        slope = round(result.slope, 3)
        r_squared = round(result.rvalue**2, 3)
        logging.info(f"Completed analysis. Slope of the trendline is {slope}.")
        return slope, r_squared

In [27]:
timestamps = ["2000", "2001", "2002"]
data = [18.36, 18.36, 17.91]

fit_trendline(timestamps, data)

INFO:root:Running fit_trendline function
ERROR:root:Both lists must contain floats or integers.
ERROR:root:ufunc 'maximum' did not contain a loop with signature matching types (dtype('<U4'), dtype('<U4')) -> None
Traceback (most recent call last):
  File "C:\Users\anpande\AppData\Local\Temp\ipykernel_17780\1963620590.py", line 4, in fit_trendline
    result = linregress(year_timestamps, data)
  File "c:\Users\anpande\Documents\Git\SD4DS\SD4DS\lib\site-packages\scipy\stats\_stats_mstats_common.py", line 156, in linregress
    if np.amax(x) == np.amin(x) and len(x) > 1:
  File "c:\Users\anpande\Documents\Git\SD4DS\SD4DS\lib\site-packages\numpy\core\fromnumeric.py", line 2827, in amax
    return _wrapreduction(a, np.maximum, 'max', axis, None, out,
  File "c:\Users\anpande\Documents\Git\SD4DS\SD4DS\lib\site-packages\numpy\core\fromnumeric.py", line 88, in _wrapreduction
    return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
numpy.core._exceptions._UFuncNoLoopError: ufunc 'maximum' d

In [None]:
logging.basicConfig(filename="chapter_5_logs.log", filemode="w", level=logging.DEBUG)

## Debugging

In [28]:
def weighted_mean(num_list, weights):
    running_total = 0
    for i in range(len(num_list)):
        running_total += num_list[i] * weights[0]
    return running_total / len(num_list)

In [37]:
weighted_mean([1, 6, 8], [1, 3, 2])

DEBUG:root:The running total at step 0 is 1
DEBUG:root:The running total at step 1 is 7
DEBUG:root:The running total at step 2 is 15


5.0

In [30]:
def weighted_mean(num_list, weights):
    running_total = 0
    for i in range(len(num_list)):
        running_total += num_list[i] * weights[0]
        print(f"The running total at step {i} is {running_total}")
    return running_total / sum(weights)

In [31]:
numbers = [10, 20, 30, 40, 50]
weights = [0.1, 0.2, 0.3, 0.2, 0.2]

weighted_mean(numbers, weights)

The running total at step 0 is 1.0
The running total at step 1 is 3.0
The running total at step 2 is 6.0
The running total at step 3 is 10.0
The running total at step 4 is 15.0


15.0

In [35]:
import logging

logging.basicConfig(
    filename="chapter_05_logs.log", level=logging.DEBUG, format="%(asctime)s %(message)s"
)


def weighted_mean(num_list, weights):
    running_total = 0
    for i in range(len(num_list)):
        running_total += num_list[i] * weights[0]
        logging.debug(f"The running total at step {i} is {running_total}")
    return running_total / len(num_list)