In [89]:
import pandas as pd

## read the fines.csv that you saved in the previous exercise

In [90]:
fines = pd.read_csv('../data/fines.csv')

In [91]:
# fines

## iterations: in all the following subtasks, you need to calculate fines/refund*year for each row and create a new column with the calculated data and measure the time using the magic command %%timeit in the cell

In [92]:
def calc_value(fines_value, refund_value, year_value):
    return fines_value / refund_value * year_value

In [93]:
fines_copy = fines.copy()

In [94]:
def calculated_loop(df):
    new_column = list()
    for i in range(0, len(df)):
        value = calc_value(df['Fines'].iloc[i], df['Refund'].iloc[i], df['Year'].iloc[i])
        new_column.append(value)
    return new_column

In [95]:
%%timeit
fines_copy['calc_values'] = calculated_loop(fines_copy)

28.9 ms ± 721 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [96]:
# fines_copy

In [97]:
fines_copy = fines.copy()

In [98]:
def calculated_iterrows(df):
    new_column = list()
    for i, j in df.iterrows():
        value = calc_value(j['Fines'], j['Refund'], j['Year'])
        new_column.append(value)
    return new_column

In [99]:
%%timeit
fines_copy['calc_values'] = calculated_iterrows(fines_copy)

62.7 ms ± 13.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [100]:
# fines_copy

In [101]:
fines_copy = fines.copy()

In [102]:
%%timeit
fines_copy['calc_values'] = fines_copy.apply(lambda row: calc_value(row['Fines'], row['Refund'], row['Year']), axis=1)

17.2 ms ± 1.37 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [103]:
# fines_copy

In [104]:
fines_copy = fines.copy()

In [105]:
%%timeit
fines_copy['calc_values'] = calc_value(fines_copy['Fines'], fines_copy['Refund'], fines_copy['Year'])

679 µs ± 160 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [106]:
# fines_copy

In [107]:
fines_copy = fines.copy()

In [108]:
%%timeit
fines_copy['calc_values'] = calc_value(fines_copy['Fines'].values,
                                       fines_copy['Refund'].values,
                                       fines_copy['Year'].values)

315 µs ± 17.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [109]:
# fines_copy

## indexing: measure the time using the magic command %%timeit in the cell

In [110]:
fines_copy = fines.copy()

In [111]:
# fines_copy

In [112]:
%timeit fines_copy[fines_copy['CarNumber'] == 'O136HO197RUS']

337 µs ± 21 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [113]:
fines_copy.set_index('CarNumber', inplace=True)

In [125]:
# fines_copy

In [115]:
%%timeit
fines_copy.loc['O136HO197RUS']

132 µs ± 3.47 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## downcasting

In [116]:
fines.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   CarNumber  930 non-null    object 
 1   Refund     930 non-null    int64  
 2   Fines      930 non-null    float64
 3   Make       930 non-null    object 
 4   Model      919 non-null    object 
 5   Year       930 non-null    int64  
dtypes: float64(1), int64(2), object(3)
memory usage: 196.5 KB


In [117]:
fines_copy = fines.copy()

In [118]:
fcols = fines_copy.select_dtypes('float64').columns
fines_copy[fcols] = fines_copy[fcols].apply(pd.to_numeric, downcast='float')

In [119]:
icols = fines_copy.select_dtypes('int64').columns
fines_copy[icols] = fines_copy[icols].apply(pd.to_numeric, downcast='integer')

In [120]:
fines_copy.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   CarNumber  930 non-null    object 
 1   Refund     930 non-null    int8   
 2   Fines      930 non-null    float32
 3   Make       930 non-null    object 
 4   Model      919 non-null    object 
 5   Year       930 non-null    int16  
dtypes: float32(1), int16(1), int8(1), object(3)
memory usage: 181.1 KB


## categories

In [121]:
ocols = fines_copy.select_dtypes('object').columns
fines_copy = fines_copy.astype(dict.fromkeys(ocols, 'category'))

In [122]:
fines_copy.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   CarNumber  930 non-null    category
 1   Refund     930 non-null    int8    
 2   Fines      930 non-null    float32 
 3   Make       930 non-null    category
 4   Model      919 non-null    category
 5   Year       930 non-null    int16   
dtypes: category(3), float32(1), int16(1), int8(1)
memory usage: 64.5 KB


## memory clean

In [123]:
import gc

In [124]:
%reset_selective -f "^fines$"
gc.collect()

120