In [18]:
# !pip install ipytest

In [17]:
from pathlib import Path

import pandas as pd
import ipytest
ipytest.autoconfig()

# Specify paths with built-in `pathlib` library

In [3]:
dataPath = Path('../data')
rawDataPath = dataPath / 'raw'

## Check if the directory exists

In [4]:
rawDataPath.exists()

True

### Get a list of CSVs in that directory

In [15]:
list(rawDataPath.glob('*.csv'))

[PosixPath('../data/raw/atlanticInterpolated.csv'),
 PosixPath('../data/raw/large.csv'),
 PosixPath('../data/raw/atlanticInterpolatedtest.csv')]

# f-strings - an easier way to add your variables into strings

In [13]:
year = 2022
currentYear = f"current year is {year}"
currentYear

'current year is 2022'

In [5]:
def loadData():
    df = pd.read_csv(rawDataPath / 'atlanticInterpolated.csv')
    return df

df = loadData()
df.head()

Unnamed: 0,z,0:June:2020,1:June:2020,2:July:2020,3:June:2021,4:June:2021,5:June:2021,6:April:2016,7:March:2018,8:May:2005,...,3915:August:2020,3916:August:2020,3917:September:2020,3918:September:2020,3919:September:2020,3920:October:2020,3921:October:2020,3922:October:2020,3923:November:2020,3924:November:2020
0,-3.0,15.655,14.549,15.049,12.237,14.069,14.178,11.103,10.482,13.578,...,18.712999,17.341,17.504,18.089001,16.77,15.748,15.24,14.937,14.277,14.013
1,-6.0,15.655,14.549,15.049,12.237,14.069,14.178,11.103,10.482,13.578,...,18.712999,17.341,17.504,18.089126,16.77,15.747917,15.24,14.937,14.277,14.013
2,-9.0,15.655,14.548529,15.049387,12.237,14.069,14.178,11.1025,10.482,13.578,...,18.637727,17.3406,17.507999,17.936714,16.763,15.748,15.240636,14.936455,14.2748,14.0149
3,-12.0,15.655,14.545,15.053,12.237,14.068902,14.178,11.101,10.482539,13.578,...,18.509321,17.339786,17.4924,17.435621,16.769793,15.748645,15.24356,14.935667,14.278457,14.015333
4,-15.0,15.526556,14.53949,15.053,12.22939,14.065961,14.166511,11.0999,10.493579,13.578,...,18.273146,17.340461,17.431673,17.029636,16.772415,15.749808,15.246787,14.936775,14.28,14.015583


In [6]:
def getColumn(df:pd.DataFrame,col:str):
    return df.loc[:,col]

In [7]:
col = 'x'
getColumn(df=df,col=col)

KeyError: 'x'

In [8]:
def getAverageTemperature(df:pd.DataFrame):
    return df.mean()

In [9]:
def test_getAverageTemperature():
    # ARRANGE
    # load the data
    df = loadData()
    # select the first two columns
    df = df.iloc[:,:2]
    # ACT
    dfAverage = getAverageTemperature(df=df)
    # ASSERT
    assert isinstance(dfAverage,pd.DataFrame)

test_getAverageTemperature()

AssertionError: 

## Responding to a test failure 1: Add an error message to the assert statement

In [10]:
def test_getAverageTemperature():
    # ARRANGE
    # load the data
    df = loadData()
    # select the first two columns
    df = df.iloc[:,:2]
    # ACT
    dfAverage = getAverageTemperature(df=df)
    # ASSERT
    assert isinstance(dfAverage,pd.DataFrame),f"{type(dfAverage)}"

test_getAverageTemperature()

AssertionError: <class 'pandas.core.series.Series'>

## Responding to a test failure 2: Use the debugger


In [11]:
test_getAverageTemperature()

AssertionError: <class 'pandas.core.series.Series'>

In [None]:
%debug

# Automated tests with pytest


In [12]:
%%ipytest -qq

def test_getAverageTemperature():
    # ARRANGE
    # load the data
    df = loadData()
    # select the first two columns
    df = df.iloc[:,:2]
    # ACT
    dfAverage = getAverageTemperature(df=df)
    # ASSERT
    assert isinstance(dfAverage,pd.DataFrame),f"{type(dfAverage)}"

UsageError: Cell magic `%%ipytest` not found.


## Use the debugger with pytest

In [None]:
%%ipytest -qq --pdb

def test_getAverageTemperature():
    # ARRANGE
    # load the data
    df = loadData()
    # select the first two columns
    df = df.iloc[:,:2]
    # ACT
    dfAverage = getAverageTemperature(df=df)
    # ASSERT
    assert isinstance(dfAverage,pd.DataFrame),f"{type(dfAverage)}"