In [1]:
import pandas as pd
import gc

fines = pd.read_csv('../data/fines.csv')
fines

Unnamed: 0,CarNumber,Make,Model,Refund,Fines,Year
0,Y163O8161RUS,Ford,Focus,2,3200.0,1989
1,E432XX77RUS,Toyota,Camry,1,6500.0,1995
2,7184TT36RUS,Ford,Focus,1,2100.0,1984
3,X582HE161RUS,Ford,Focus,2,2000.0,2015
4,92918M178RUS,Ford,Focus,1,5700.0,2014
...,...,...,...,...,...,...
925,X2K321EE32R,BMW,M5,1,2400.0,2010
926,C5K3212d32R,Toyota,Corolla,2,4200.0,2014
927,32FGE325HCV,Honda,Civic,4,1200.0,2012
928,5DK3212d32R,Chevrolet,Malibu,3,6000.0,2018


In [2]:
def calculate_with_loop(df):
    results = []
    for i in range(len(df)):
        fines = df.iloc[i]['Fines']
        refund = df.iloc[i]['Refund']
        year = df.iloc[i]['Year']
        value = (fines / refund) * year
        results.append(value)
    return results


In [3]:
def calculate_with_iterrows(df):
    results = []
    for index, row in df.iterrows():
        fines = row['Fines']
        refund = row['Refund']
        year = row['Year']
        value = (fines / refund) * year
        results.append(value)
    return results


In [4]:
%%timeit
fines['calc_loop'] = calculate_with_loop(fines)


42.7 ms ± 4.73 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
%%timeit
fines['calc_iterrows'] = calculate_with_iterrows(fines)


13.7 ms ± 93.8 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
%%timeit
fines['calc_apply'] = fines.apply(lambda row: row['Fines'] / row['Refund'] * row['Year'],axis=1)

3.79 ms ± 55 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
fines

Unnamed: 0,CarNumber,Make,Model,Refund,Fines,Year,calc_loop,calc_iterrows,calc_apply
0,Y163O8161RUS,Ford,Focus,2,3200.0,1989,3182400.0,3182400.0,3182400.0
1,E432XX77RUS,Toyota,Camry,1,6500.0,1995,12967500.0,12967500.0,12967500.0
2,7184TT36RUS,Ford,Focus,1,2100.0,1984,4166400.0,4166400.0,4166400.0
3,X582HE161RUS,Ford,Focus,2,2000.0,2015,2015000.0,2015000.0,2015000.0
4,92918M178RUS,Ford,Focus,1,5700.0,2014,11479800.0,11479800.0,11479800.0
...,...,...,...,...,...,...,...,...,...
925,X2K321EE32R,BMW,M5,1,2400.0,2010,4824000.0,4824000.0,4824000.0
926,C5K3212d32R,Toyota,Corolla,2,4200.0,2014,4229400.0,4229400.0,4229400.0
927,32FGE325HCV,Honda,Civic,4,1200.0,2012,603600.0,603600.0,603600.0
928,5DK3212d32R,Chevrolet,Malibu,3,6000.0,2018,4036000.0,4036000.0,4036000.0


In [8]:
data = {
    'Fines':[100.0,200.0,300.0],
    'Refund':[2,4,6],
    'Year': [2000,2010,2015]
}
new_fines = pd.DataFrame(data)


In [9]:
%%timeit
new_fines['calc_series'] = new_fines['Fines']/new_fines['Refund'] * new_fines['Year']


86.2 μs ± 4.38 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [10]:
%%timeit
fines['calc_values'] = fines['Fines'].values / fines['Refund'].values * fines['Year'].values 

41.7 μs ± 844 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [11]:
%%timeit
fines[fines['CarNumber'] =='O136HO197RUS']
# print(x)
fines.set_index('CarNumber')
fines[fines['CarNumber'] =='O136HO197RUS']



519 μs ± 7.27 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [12]:
fines.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   CarNumber      930 non-null    object 
 1   Make           930 non-null    object 
 2   Model          918 non-null    object 
 3   Refund         930 non-null    int64  
 4   Fines          930 non-null    float64
 5   Year           930 non-null    int64  
 6   calc_loop      930 non-null    float64
 7   calc_iterrows  930 non-null    float64
 8   calc_apply     930 non-null    float64
 9   calc_values    930 non-null    float64
dtypes: float64(5), int64(2), object(3)
memory usage: 203.9 KB


In [13]:
df_optimized = fines.copy(deep=True)


float_cols = df_optimized.select_dtypes(include=['float64']).columns
df_optimized[float_cols] = df_optimized[float_cols].astype('float32')


int_cols = df_optimized.select_dtypes(include=['int64']).columns
df_optimized[int_cols] = df_optimized[int_cols].astype('int8')

print(df_optimized.dtypes)



CarNumber         object
Make              object
Model             object
Refund              int8
Fines            float32
Year                int8
calc_loop        float32
calc_iterrows    float32
calc_apply       float32
calc_values      float32
dtype: object


In [14]:
df_optimized.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   CarNumber      930 non-null    object 
 1   Make           930 non-null    object 
 2   Model          918 non-null    object 
 3   Refund         930 non-null    int8   
 4   Fines          930 non-null    float32
 5   Year           930 non-null    int8   
 6   calc_loop      930 non-null    float32
 7   calc_iterrows  930 non-null    float32
 8   calc_apply     930 non-null    float32
 9   calc_values    930 non-null    float32
dtypes: float32(5), int8(2), object(3)
memory usage: 173.0 KB


In [15]:
object_col = df_optimized.select_dtypes(include='object').columns
df_optimized[object_col] = df_optimized[object_col].astype('category')
df_optimized.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 930 entries, 0 to 929
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   CarNumber      930 non-null    category
 1   Make           930 non-null    category
 2   Model          918 non-null    category
 3   Refund         930 non-null    int8    
 4   Fines          930 non-null    float32 
 5   Year           930 non-null    int8    
 6   calc_loop      930 non-null    float32 
 7   calc_iterrows  930 non-null    float32 
 8   calc_apply     930 non-null    float32 
 9   calc_values    930 non-null    float32 
dtypes: category(3), float32(5), int8(2)
memory usage: 73.8 KB


In [17]:
%reset_selective -f fines

gc.collect


NameError: name 'fines' is not defined