In [1]:
import numpy as np
import pandas as pd
# visualization
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# configure visualizations
sns.set_style('whitegrid')
figsize=(8,6)

from utils import mem_usage, save_dtypes, cache_dtypes, save_df, \
typecast_ints, typecast_floats, typecast_objects

gl = pd.read_csv('../datasets/DrawAllocation.csv')
gl.info(memory_usage='deep')

# downcast integer columns
converted_int = typecast_ints(gl.select_dtypes(include=['int']))

# downcast float columns
converted_float = typecast_floats(gl.select_dtypes(include=['float']))

# convert object columns to lowercase
gl_obj = gl.select_dtypes(include=['object'])
gl_obj = gl_obj.apply(lambda x: x.str.strip())
gl_obj = gl_obj.apply(lambda x: x.str.lower())

# convert object to category columns
# when unique values < 50% of total
converted_obj = typecast_objects(gl_obj)

# transform optimized types
gl[converted_int.columns] = converted_int
gl[converted_float.columns] = converted_float
gl[converted_obj.columns] = converted_obj

print('\n')

gl.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 575 entries, 0 to 574
Data columns (total 9 columns):
EGAP(s)                 575 non-null object
# EGAPs fixed           575 non-null int64
# Hits on fixed code    575 non-null int64
Energy (before fix)     575 non-null float64
Energy (after fix)      575 non-null float64
Time (before fix)       575 non-null float64
Time (after fix)        575 non-null float64
# Total method calls    575 non-null int64
# Diff. method calls    575 non-null int64
dtypes: float64(4), int64(4), object(1)
memory usage: 75.9 KB


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 575 entries, 0 to 574
Data columns (total 9 columns):
EGAP(s)                 575 non-null category
# EGAPs fixed           575 non-null uint8
# Hits on fixed code    575 non-null uint8
Energy (before fix)     575 non-null float32
Energy (after fix)      575 non-null float32
Time (before fix)       575 non-null float32
Time (after fix)        575 non-null float32
# Total method calls

In [2]:
gl.sample(5)

Unnamed: 0,EGAP(s),# EGAPs fixed,# Hits on fixed code,Energy (before fix),Energy (after fix),Time (before fix),Time (after fix),# Total method calls,# Diff. method calls
282,drawallocation,1,1,19963.398438,49479.0,20483.125,49172.0,55,29
76,drawallocation,2,0,30889.769531,49090.0,30719.453125,49119.0,3281,89
557,drawallocation,1,0,28478.429688,59611.0,28139.435547,59615.0,172,25
71,drawallocation,2,0,26560.794922,41803.0,27473.306641,41763.0,3249,89
108,drawallocation,2,0,35701.582031,59723.0,35818.082031,59739.0,3058,89


In [3]:
gl.describe(include=[np.number])

Unnamed: 0,# EGAPs fixed,# Hits on fixed code,Energy (before fix),Energy (after fix),Time (before fix),Time (after fix),# Total method calls,# Diff. method calls
count,575.0,575.0,575.0,575.0,575.0,575.0,575.0,575.0
mean,1.347826,0.73913,24302.119141,47418.183594,25327.060547,47304.726562,883.869565,38.304348
std,0.476695,1.073005,4805.154297,7400.655273,5621.820801,7254.501465,1311.8622,28.260293
min,1.0,0.0,13322.349609,39954.0,16817.513672,39957.0,10.0,4.0
25%,1.0,0.0,21932.481445,41658.0,22158.792969,41655.0,59.0,25.0
50%,1.0,0.0,23467.248047,43217.0,24759.035156,43273.0,172.0,29.0
75%,2.0,1.0,26481.020508,50468.0,28087.954102,49388.5,1037.0,32.0
max,2.0,4.0,60513.527344,61241.0,101580.648438,60182.0,3784.0,93.0


In [4]:
gl.columns = ['pattern', 'patterns_fixed', 'hits', 'energy_before', 
              'energy_after', 'time_before', 'time_after', 'total_calls', 'diff_calls']
gl.head()

Unnamed: 0,pattern,patterns_fixed,hits,energy_before,energy_after,time_before,time_after,total_calls,diff_calls
0,drawallocation,2,0,23922.183594,42967.0,24299.384766,42954.0,3282,89
1,drawallocation,2,0,24268.382812,42967.0,24437.320312,42969.0,3282,89
2,drawallocation,2,0,24425.021484,42987.0,24446.060547,43006.0,3282,89
3,drawallocation,2,0,24522.003906,43005.0,24584.330078,43063.0,3282,89
4,drawallocation,2,0,24576.273438,43014.0,24694.568359,43077.0,3282,89


In [11]:
gl['energy_diff'] = gl['energy_before'] - gl['energy_after']
gl['energy_diff_perct'] = gl['energy_after'] * 100 / gl['energy_before']
gl.sample(5)

Unnamed: 0,pattern,patterns_fixed,hits,energy_before,energy_after,time_before,time_after,total_calls,diff_calls,energy_diff,energy_diff_perct
25,drawallocation,2,0,25365.978516,40120.0,26865.535156,39957.0,3784,93,-14754.021484,158.164612
477,drawallocation,1,0,21308.746094,40100.0,21169.679688,40064.0,210,25,-18791.253906,188.185638
250,drawallocation,1,1,17371.5625,41451.0,16817.513672,41858.0,47,29,-24079.4375,238.61412
332,drawallocation,1,0,22813.990234,43156.0,22889.470703,43158.0,10,4,-20342.009766,189.164627
245,drawallocation,1,1,23152.248047,40258.0,18895.738281,40657.0,49,29,-17105.751953,173.883759


In [12]:
gl['energy_diff_perct'].describe()

count    575.000000
mean     200.927353
std       45.047745
min       87.902657
25%      174.271301
50%      187.547226
75%      206.807976
max      317.819031
Name: energy_diff_perct, dtype: float64