In [30]:
import pandas as pd
import gc

## read the fines.csv

In [31]:
df = pd.read_csv(
    "fines.csv",
)


## iterations


In [32]:
%%timeit
def loops(df):
    results = []
    for i in range(0, len(df)):
        row = df.iloc[i]
        results.append(int(row['Fines']/row['Refund']*row['Years']))
    return results
df['Sum1'] = loops(df)

25.8 ms ± 140 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [33]:
%%timeit
def iterrow(df):
    results = []
    for index, row in df.iterrows():
        results.append(int(row['Fines']/row['Refund']*row['Years']))
    return results
df['Sum2'] = iterrow(df)

14.1 ms ± 69.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [34]:
%%timeit
df['Sum3'] = df.apply(lambda row: int(row['Fines']/row['Refund']*row['Years']), axis = 1)

4.22 ms ± 20.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## indexing

In [35]:
%%timeit
df.loc[df['CarNumber'] == "O136HO197RUS"]

101 µs ± 316 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [36]:
df = df.set_index('CarNumber')

In [37]:
%%timeit
df.loc['O136HO197RUS']

42.2 µs ± 127 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


## downcasting

In [38]:
df.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Index: 930 entries, Y163O8161RUS to POST36US
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Refund  930 non-null    int64  
 1   Fines   930 non-null    float64
 2   Make    930 non-null    object 
 3   Model   930 non-null    object 
 4   Years   930 non-null    int64  
 5   Sum1    930 non-null    int64  
 6   Sum2    930 non-null    int64  
 7   Sum3    930 non-null    int64  
dtypes: float64(1), int64(5), object(2)
memory usage: 250.8 KB


In [39]:
df_optimized = df.copy()
fcols = df_optimized.select_dtypes('float').columns
icols = df_optimized.select_dtypes('integer').columns
df_optimized[fcols] = df_optimized[fcols].apply(pd.to_numeric, downcast='float')
df_optimized[icols] = df_optimized[icols].apply(pd.to_numeric, downcast='integer')
df_optimized.info(memory_usage='deep')


<class 'pandas.core.frame.DataFrame'>
Index: 930 entries, Y163O8161RUS to POST36US
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Refund  930 non-null    int8   
 1   Fines   930 non-null    float32
 2   Make    930 non-null    object 
 3   Model   930 non-null    object 
 4   Years   930 non-null    int16  
 5   Sum1    930 non-null    int32  
 6   Sum2    930 non-null    int32  
 7   Sum3    930 non-null    int32  
dtypes: float32(1), int16(1), int32(3), int8(1), object(2)
memory usage: 224.5 KB


## categories

In [40]:
for col in ['Make', 'Model']:
    df_optimized[col] = df_optimized[col].astype('category')
df_optimized.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Index: 930 entries, Y163O8161RUS to POST36US
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   Refund  930 non-null    int8    
 1   Fines   930 non-null    float32 
 2   Make    930 non-null    category
 3   Model   930 non-null    category
 4   Years   930 non-null    int16   
 5   Sum1    930 non-null    int32   
 6   Sum2    930 non-null    int32   
 7   Sum3    930 non-null    int32   
dtypes: category(2), float32(1), int16(1), int32(3), int8(1)
memory usage: 116.3 KB


## memory clean

In [41]:
del df
gc.collect()

1466

In [42]:
%reset_selective -f df