In [1]:
import pandas as pd

## read the fines.csv that you saved in the previous exercise

In [2]:
fines = pd.read_csv('../data/fines.csv')

In [3]:
# fines

## iterations: in all the following subtasks, you need to calculate fines/refund*year for each row and create a new column with the calculated data and measure the time using the magic command %%timeit in the cell

In [4]:
def calc_value(fines_value, refund_value, year_value):
    return fines_value / refund_value * year_value

In [5]:
fines_copy = fines.copy()

In [6]:
def calculated_loop(df):
    new_column = list()
    for i in range(0, len(df)):
        value = calc_value(df['Fines'].iloc[i], df['Refund'].iloc[i], df['Year'].iloc[i])
        new_column.append(value)
    return new_column

In [7]:
%%timeit
fines_copy['calc_values'] = calculated_loop(fines_copy)

27.5 ms ± 5.21 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [8]:
# fines_copy

In [9]:
fines_copy = fines.copy()

In [10]:
def calculated_iterrows(df):
    new_column = list()
    for i, j in df.iterrows():
        value = calc_value(j['Fines'], j['Refund'], j['Year'])
        new_column.append(value)
    return new_column

In [11]:
%%timeit
fines_copy['calc_values'] = calculated_iterrows(fines_copy)

50.7 ms ± 5.82 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [12]:
# fines_copy

In [13]:
fines_copy = fines.copy()

In [14]:
%%timeit
fines_copy['calc_values'] = fines_copy.apply(lambda row: calc_value(row['Fines'], row['Refund'], row['Year']), axis=1)

14 ms ± 479 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [15]:
# fines_copy

In [16]:
fines_copy = fines.copy()

In [17]:
%%timeit
fines_copy['calc_values'] = calc_value(fines_copy['Fines'], fines_copy['Refund'], fines_copy['Year'])

471 µs ± 10.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [18]:
# fines_copy

In [19]:
fines_copy = fines.copy()

In [22]:
%%timeit
fines_copy['calc_values'] = calc_value(fines_copy['Fines'].values,
                                       fines_copy['Refund'].values,
                                       fines_copy['Year'].values)

309 µs ± 47.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [23]:
# fines_copy

## indexing: measure the time using the magic command %%timeit in the cell

In [36]:
fines_copy = fines.copy()

In [26]:
# fines_copy

In [27]:
%timeit fines_copy[fines_copy['CarNumber'] == 'O136HO197RUS']

335 µs ± 34 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [37]:
fines_copy.set_index('CarNumber', inplace=True)

In [38]:
fines_copy

Unnamed: 0_level_0,Refund,Fines,Make,Model,Year
CarNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Y163O8161RUS,2,3200.0,Ford,Focus,1989
E432XX77RUS,1,6500.0,Toyota,Camry,1995
7184TT36RUS,1,2100.0,Ford,Focus,1984
X582HE161RUS,2,2000.0,Ford,Focus,2015
92918M178RUS,1,5700.0,Ford,Focus,2014
...,...,...,...,...,...
K307NA30RUS,2,7327.0,Audi,RS7,2015
X007OB190RUS,1,10964.0,Kia,Rio,2009
H729HO170RUS,1,11336.0,BMW,Z4,2019
K961KC12RUS,2,25228.0,BMW,M3,2021


In [39]:
%%timeit
fines_copy.loc['O136HO197RUS']

128 µs ± 2.01 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## downcasting

In [69]:
fines.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   CarNumber  930 non-null    object 
 1   Refund     930 non-null    int64  
 2   Fines      930 non-null    float64
 3   Make       930 non-null    object 
 4   Model      919 non-null    object 
 5   Year       930 non-null    int64  
dtypes: float64(1), int64(2), object(3)
memory usage: 196.5 KB


In [70]:
fines_copy = fines.copy()

In [71]:
fcols = fines_copy.select_dtypes('float64').columns
fines_copy[fcols] = fines_copy[fcols].apply(pd.to_numeric, downcast='float')

In [72]:
icols = fines_copy.select_dtypes('int64').columns
fines_copy[icols] = fines_copy[icols].apply(pd.to_numeric, downcast='integer')

In [73]:
fines_copy.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   CarNumber  930 non-null    object 
 1   Refund     930 non-null    int8   
 2   Fines      930 non-null    float32
 3   Make       930 non-null    object 
 4   Model      919 non-null    object 
 5   Year       930 non-null    int16  
dtypes: float32(1), int16(1), int8(1), object(3)
memory usage: 181.1 KB
