### Import Libraries

In [4]:
import pandas as pd
pd.set_option('display.max_column', 250)
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-notebook')
from matplotlib import rcParams
rcParams['figure.figsize'] = (5, 3)

rcParams['figure.dpi'] = 150
%matplotlib inline

In [5]:
def get_info(DataFrame, label_col):
    print("The shape of data: {}".format(DataFrame.shape))
    print("Number of classes: {}".format(DataFrame[label_col].nunique()))
    print("Class information: \n{}".format(DataFrame.groupby([label_col])[label_col].count()))

In [6]:
def read_arff(path):
    import arff
    data = arff.load(open(path))
    cnames = [i[0] for i in data['attributes']]
    df = pd.DataFrame(data['data'], columns=cnames)
    return df

## Electricity Data

In [22]:
data_elec = read_arff('data/elecNormNew.arff')
data_elec.head()

get_info(data_elec, 'class')

Unnamed: 0,date,day,period,nswprice,nswdemand,vicprice,vicdemand,transfer,class
0,0.0,2,0.0,0.056443,0.439155,0.003467,0.422915,0.414912,UP
1,0.0,2,0.021277,0.051699,0.415055,0.003467,0.422915,0.414912,UP
2,0.0,2,0.042553,0.051489,0.385004,0.003467,0.422915,0.414912,UP
3,0.0,2,0.06383,0.045485,0.314639,0.003467,0.422915,0.414912,UP
4,0.0,2,0.085106,0.042482,0.251116,0.003467,0.422915,0.414912,DOWN


## Airlines

In [26]:
data_airlines = read_arff('data/airlines.arff')
display(data_airlines.head())

get_info(data_airlines, 'Delay')

Unnamed: 0,Airline,Flight,AirportFrom,AirportTo,DayOfWeek,Time,Length,Delay
0,CO,269.0,SFO,IAH,3,15.0,205.0,1
1,US,1558.0,PHX,CLT,3,15.0,222.0,1
2,AA,2400.0,LAX,DFW,3,20.0,165.0,1
3,AA,2466.0,SFO,DFW,3,20.0,195.0,1
4,AS,108.0,ANC,SEA,3,30.0,202.0,0


The shape of data: (539383, 8)
Number of classes: 2
Class information: 
Delay
0    299119
1    240264
Name: Delay, dtype: int64


## Covertype

In [30]:
data_covtype = read_arff('data/covtypeNorm.arff')
display(data_covtype.head())

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,Wilderness_Area1,Wilderness_Area2,Wilderness_Area3,Wilderness_Area4,Soil_Type1,Soil_Type2,Soil_Type3,Soil_Type4,Soil_Type5,Soil_Type6,Soil_Type7,Soil_Type8,Soil_Type9,Soil_Type10,Soil_Type11,Soil_Type12,Soil_Type13,Soil_Type14,Soil_Type15,Soil_Type16,Soil_Type17,Soil_Type18,Soil_Type19,Soil_Type20,Soil_Type21,Soil_Type22,Soil_Type23,Soil_Type24,Soil_Type25,Soil_Type26,Soil_Type27,Soil_Type28,Soil_Type29,Soil_Type30,Soil_Type31,Soil_Type32,Soil_Type33,Soil_Type34,Soil_Type35,Soil_Type36,Soil_Type37,Soil_Type38,Soil_Type39,Soil_Type40,class
0,0.368684,0.141667,0.045455,0.184681,0.223514,0.071659,0.870079,0.913386,0.582677,0.875366,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,5
1,0.365683,0.155556,0.030303,0.151754,0.215762,0.054798,0.866142,0.925197,0.594488,0.867838,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,5
2,0.472736,0.386111,0.136364,0.19184,0.307494,0.446817,0.92126,0.937008,0.531496,0.853339,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
3,0.463232,0.430556,0.272727,0.173228,0.375969,0.434172,0.937008,0.937008,0.480315,0.865886,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2
4,0.368184,0.125,0.030303,0.10952,0.222222,0.054939,0.866142,0.92126,0.590551,0.860449,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,5


In [31]:
get_info(data_covtype, 'class')

The shape of data: (581012, 55)
Number of classes: 7
Class information: 
class
1    211840
2    283301
3     35754
4      2747
5      9493
6     17367
7     20510
Name: class, dtype: int64


## Poker Hand

In [33]:
data_poker = read_arff('data/poker-lsn.arff')
display(data_poker.head())

Unnamed: 0,s1,r1,s2,r2,s3,r3,s4,r4,s5,r5,class
0,1,1.0,1,10.0,1,11.0,1,12.0,2,13.0,4
1,1,1.0,1,10.0,1,11.0,1,12.0,4,13.0,4
2,1,1.0,1,10.0,1,11.0,2,11.0,1,12.0,1
3,1,1.0,1,10.0,1,11.0,2,11.0,1,13.0,1
4,1,1.0,1,10.0,1,11.0,2,11.0,2,12.0,1


In [34]:
get_info(data_poker, 'class')

The shape of data: (829201, 11)
Number of classes: 10
Class information: 
class
0    415526
1    350426
2     39432
3     17541
4      3225
5      1657
6      1186
7       195
8        11
9         2
Name: class, dtype: int64


## Gas Sensor

In [9]:
from sklearn.datasets import load_svmlight_file

X, y = load_svmlight_file('data/batch1.dat')
df = pd.DataFrame(X.toarray(), columns=range(1,129))

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128
0,15596.1621,1.868245,2.371604,2.803678,7.512213,-2.739388,-3.344671,-4.847512,15326.6914,1.768526,2.269085,2.713374,6.915721,-2.488324,-3.082212,-5.056975,2789.3831,2.754759,0.430440,0.649457,1.795029,-0.426662,-0.584313,-1.438976,2581.5686,2.680623,0.399746,0.605065,1.786704,-0.400115,-0.550743,-1.728611,685.3994,1.682904,0.122736,0.223703,0.584691,-0.138196,-0.236907,-0.781959,797.7738,1.742488,0.152483,0.218904,0.841862,-0.164646,-0.315720,-0.791447,3128.8489,3.605537,0.532422,0.763062,2.118983,-0.557197,-0.809953,-2.344130,3136.8778,3.555169,0.535883,0.761388,1.499244,-0.571480,-0.944425,-2.658358,13540.6738,1.765738,2.006883,2.519022,6.261430,-2.172101,-2.694967,-3.791499,13831.7539,1.746493,2.057165,2.391239,5.695234,-2.350776,-2.888766,-8.129869,3020.9191,2.819354,0.474520,0.723993,2.160130,-0.467900,-0.638167,-1.643650,2185.9741,2.949381,0.342575,0.515090,1.340477,-0.361030,-0.493482,-1.200617,862.7479,1.779291,0.165138,0.246473,1.358106,-0.187465,-0.416382,-1.058061,1059.7562,1.896047,0.198946,0.334017,0.815048,-0.204467,-0.345119,-0.969336,3357.1124,3.860647,0.580818,0.806830,1.729739,-0.619214,-1.071137,-3.037772,3037.0390,3.972203,0.527291,0.728443,1.445783,-0.545079,-0.902241,-2.654529
1,26402.0704,2.532401,5.411209,6.509906,7.658469,-4.722217,-5.817651,-7.518333,23855.7812,2.164706,4.901063,5.971392,6.978131,-4.199424,-5.221090,-7.604863,3882.9057,3.155054,0.802987,1.172245,1.570895,-0.697881,-0.944655,-1.334743,3651.0427,3.130669,0.739278,1.101765,1.428631,-0.659641,-0.915543,-1.334217,982.9490,1.933877,0.233308,0.378052,0.717916,-0.225572,-0.354591,-0.748061,1162.2378,2.039431,0.278337,0.486908,0.840494,-0.271369,-0.473164,-0.718214,4332.1543,3.990162,1.002503,1.586233,1.926608,-0.913245,-1.335612,-1.773328,4387.5284,4.011519,1.009769,1.647830,1.962858,-0.944028,-1.631468,-1.925095,20553.5645,2.108870,4.266941,5.185937,6.398693,-3.636176,-4.497039,-7.464548,22540.1933,2.222146,4.621283,5.573620,6.756039,-3.986434,-4.953663,-6.561748,4240.3011,3.262893,0.887237,1.297617,1.630074,-0.768036,-1.244472,-1.419646,3079.0621,3.399659,0.649639,0.948781,1.369550,-0.598444,-0.820868,-1.081919,1237.0131,2.067927,0.305793,0.558008,0.948564,-0.289557,-0.501078,-1.191404,1459.6356,2.149242,0.361007,0.614619,0.950204,-0.327339,-0.503861,-0.838039,4685.7753,4.277604,1.080629,1.818559,2.237534,-1.004812,-1.530519,-1.994993,4176.4453,4.281373,0.980205,1.628050,1.951172,-0.889333,-1.323505,-1.749225
2,42103.5820,3.454189,8.198175,10.508439,11.611003,-7.668313,-9.478675,-12.230939,37562.3008,2.840403,7.386357,9.511703,12.129657,-6.689464,-8.184333,-10.196169,5476.3898,3.929721,1.132118,1.717418,2.050157,-1.052941,-1.436270,-1.956262,5140.8302,3.880357,1.054304,1.649600,1.964727,-0.995282,-1.377502,-1.844021,1409.2183,2.316449,0.335154,0.572576,0.932730,-0.337312,-0.532147,-1.011991,1647.6780,2.443898,0.405582,0.725644,1.006433,-0.403554,-0.676717,-1.008518,6145.6659,4.998625,1.426344,2.407405,2.856787,-1.377070,-1.994748,-2.445191,6197.3047,4.997810,1.473118,2.545978,3.331699,-1.420663,-2.185120,-2.625092,32366.9336,2.742087,6.419288,8.290210,9.709606,-5.792984,-7.171543,-9.797253,35597.0020,2.916899,6.969954,8.951225,10.413684,-6.441517,-7.914062,-9.973361,5968.8666,4.056138,1.262938,1.935885,2.346947,-1.159097,-1.791556,-2.233596,4409.1721,4.286887,0.917809,1.390353,1.810937,-0.909467,-1.254904,-1.732933,1735.0383,2.466508,0.450574,0.841068,1.808291,-0.426742,-0.711579,-1.129027,2040.1528,2.572696,0.509803,0.932630,1.248513,-0.475471,-0.730332,-1.126488,6630.1836,5.369827,1.567405,2.790448,3.279244,-1.518135,-2.384784,-2.867291,5914.6685,5.396827,1.403973,2.476956,3.039841,-1.334558,-1.993659,-2.348370
3,42825.9883,3.451192,12.113940,16.266853,39.910056,-7.849409,-9.689894,-11.921704,38379.0664,2.851173,10.840889,14.566782,39.100492,-6.878915,-8.470092,-10.739835,5670.7144,3.994661,1.587322,2.963585,2.889819,-1.116192,-1.504534,-2.159710,5343.8337,3.962529,1.479765,2.653653,2.753598,-1.061584,-1.465905,-2.027846,1499.3987,2.415714,0.476230,0.834066,1.316245,-0.373295,-0.562486,-1.005683,1747.0367,2.543528,0.575862,1.045360,1.582893,-0.437584,-0.720969,-1.172276,6361.2179,5.118540,2.016031,3.511800,3.989634,-1.468832,-2.164832,-2.952224,6443.5758,5.138118,2.067199,3.618647,4.262233,-1.521618,-2.328864,-2.871851,33300.6055,2.765042,9.397436,13.022406,29.111833,-5.999020,-7.466897,-9.590399,36487.1211,2.937415,10.300845,14.504802,29.353683,-6.624663,-8.135443,-10.433171,6187.1446,4.127966,1.772427,3.242992,3.232927,-1.233289,-1.676956,-2.110830,4570.3443,4.382205,1.298156,2.334433,2.427754,-0.964750,-1.315083,-1.575122,1837.0551,2.560248,0.626115,1.226975,1.828968,-0.463871,-0.785627,-1.249903,2144.8436,2.660842,0.714324,1.322181,1.883925,-0.513716,-0.777995,-1.134804,6933.2844,5.507270,2.215701,4.042550,4.675110,-1.644751,-2.607199,-3.058086,6147.4744,5.501071,1.981933,3.569823,4.049197,-1.432205,-2.146158,-2.488957
4,58151.1757,4.194839,11.455096,15.715298,17.654915,-11.083364,-13.580692,-16.407848,51975.5899,3.480866,10.409176,14.379885,15.387529,-9.543570,-11.707853,-14.370753,7483.3656,5.074172,1.576100,2.558010,3.389052,-1.515863,-2.059119,-2.494956,6995.2396,4.995555,1.474554,2.500523,2.981684,-1.436586,-2.006992,-2.424730,1976.1011,2.893531,0.480052,0.871868,1.301642,-0.487181,-0.753798,-1.628245,2286.8792,3.058936,0.581186,1.145093,1.508896,-0.582666,-0.949063,-1.270385,8474.3696,6.673105,2.025916,3.650673,4.291259,-1.985612,-2.992950,-3.446640,8547.2146,6.689090,2.078846,3.889228,4.392452,-2.073675,-3.305190,-3.792976,45212.6523,3.384210,9.083462,12.575800,13.600584,-8.326705,-10.152834,-12.301180,49670.9785,3.602189,9.905257,13.542131,14.934498,-9.292178,-12.009161,-13.677505,8128.6561,5.244599,1.762437,2.896088,3.339938,-1.674587,-2.412309,-2.787965,6086.2545,5.642596,1.293397,2.071232,2.683706,-1.316635,-1.798607,-3.026084,2388.1891,3.067174,0.632552,1.322201,1.666461,-0.611909,-1.018826,-1.770461,2784.4952,3.196657,0.716810,1.419936,1.845668,-0.663462,-1.022919,-1.602759,9179.2338,7.214645,2.236090,4.367815,4.946454,-2.249702,-3.594763,-4.181920,8158.6449,7.174334,1.993808,3.829303,4.402448,-1.930107,-2.931265,-4.088756
5,79872.1308,5.553595,18.690482,27.191770,28.636972,-15.054684,-18.399695,-23.376017,69637.2754,4.365858,16.623774,24.791658,25.989573,-12.828155,-16.549555,-20.112821,9835.1174,6.299183,2.389469,3.796117,4.288415,-2.118118,-2.906302,-3.433935,9255.4179,6.238049,2.230755,3.689638,4.090581,-1.998723,-2.807461,-3.392109,2606.0369,3.490512,0.716663,1.241189,1.659610,-0.671591,-1.043806,-1.428010,3022.4085,3.711339,0.865340,1.648878,2.234066,-0.789451,-1.286708,-1.702029,11188.9991,8.323346,3.068543,5.292209,5.952296,-2.785645,-4.210229,-4.818384,11383.0917,8.409444,3.170084,5.630515,6.503255,-2.904447,-4.577134,-5.408941,61319.5254,4.253409,14.423004,21.247317,22.601927,-11.321185,-13.930309,-17.156556,67797.5839,4.607981,15.983838,23.213847,24.498189,-12.554298,-15.317321,-18.457715,10784.8729,6.575018,2.675905,4.299588,4.751535,-2.342700,-3.263856,-5.629091,8034.6098,7.032801,1.976083,3.034488,3.526779,-1.835185,-2.530747,-2.897946,3135.2399,3.698660,0.922801,1.939194,2.566183,-0.824276,-1.407632,-1.863665,3581.2419,3.801432,1.030426,1.994314,2.499493,-0.898174,-1.406994,-1.944686,12317.5990,9.126387,3.407953,6.431166,7.407149,-3.151739,-5.092170,-5.657237,10731.9632,8.900291,3.002584,5.620025,6.530042,-2.688513,-4.173558,-4.808914
6,94981.5859,5.872932,22.318174,32.673187,34.308656,-18.198780,-22.253267,-27.061036,82237.3906,4.791826,19.886142,29.674004,31.494643,-15.338134,-18.842302,-25.042549,11763.6746,7.182966,2.822376,4.384479,4.869460,-2.668013,-3.805841,-4.346283,11135.1424,7.077395,2.648131,4.426393,4.970171,-2.515021,-3.583348,-4.122559,3137.4044,3.976838,0.846828,1.556548,2.016268,-0.840238,-1.390487,-1.778833,3648.1385,4.225473,1.020252,2.059313,2.660308,-0.981516,-1.642684,-2.286953,13491.3383,9.429182,3.668589,6.419015,7.317818,-3.542601,-5.622684,-6.598720,13863.3439,9.558369,3.805783,6.848187,7.800004,-3.689825,-5.968822,-6.710757,73148.6308,4.703971,17.238466,25.719414,27.483770,-13.693478,-16.757599,-19.857783,80701.8926,4.987113,19.143022,27.946251,30.138077,-15.082557,-18.354497,-22.405683,13001.4220,7.465825,3.179180,5.091477,5.720037,-2.955288,-4.191203,-4.860338,9607.6811,7.993631,2.349944,3.456891,3.916064,-2.308353,-3.278151,-3.778427,3784.3863,4.208613,1.076874,2.439297,3.374040,-1.020918,-1.766002,-2.232931,4224.2833,4.260557,1.189883,2.597907,3.345007,-1.117340,-1.856324,-2.591929,15106.7735,10.452707,4.130756,7.789363,8.900816,-4.020786,-6.463239,-7.132363,12925.3449,10.103240,3.604875,6.913722,7.949838,-3.407223,-5.363093,-5.870612
7,108075.6269,6.064713,25.660255,38.059268,40.191974,-21.472657,-26.199885,-32.367465,93658.4727,5.424357,22.732318,34.234901,36.589871,-17.667668,-22.433893,-26.278387,13541.5728,8.382525,3.208531,5.171619,5.836406,-3.120246,-4.448414,-4.940320,12920.0123,8.020969,3.009983,4.821765,5.465376,-2.922777,-4.096956,-4.699142,3612.1445,4.529567,0.972667,1.908625,2.576447,-0.977648,-1.638180,-2.011908,4197.0498,4.704184,1.146005,2.274795,3.019108,-1.123915,-1.810561,-2.264437,15550.6230,11.086588,4.242341,7.556179,8.658507,-4.150031,-6.670267,-7.636853,16133.5043,10.930175,4.360687,7.597568,9.155645,-4.271754,-6.764925,-7.392497,84127.4199,5.437270,19.926774,29.933688,31.779802,-15.870318,-20.933747,-23.931839,91696.9531,5.116772,21.566001,31.386522,33.481847,-17.805570,-22.781483,-28.051996,15193.3403,8.556785,3.622358,5.440173,6.103528,-3.447133,-4.930537,-5.394298,11070.1141,9.440263,2.679322,4.151160,4.827841,-2.688305,-3.855341,-4.405140,4343.6283,4.656264,1.200699,2.713749,3.630620,-1.154524,-1.926289,-2.417209,4807.7595,4.860622,1.358148,3.211212,4.217425,-1.278851,-2.237298,-2.931159,17815.9929,12.022409,4.825960,8.792724,10.023053,-4.771629,-7.838325,-8.454440,14926.7949,11.930814,4.169038,8.009728,9.463272,-4.029467,-6.616902,-7.203445
8,121315.7675,7.118364,28.686061,42.979381,45.482180,-23.152346,-28.351092,-43.046267,104314.9746,6.027791,25.512459,39.371068,41.515299,-19.104532,-23.220528,-30.535494,15480.8656,9.373102,3.614998,5.611369,6.338824,-3.697153,-5.309095,-5.858920,14627.7208,9.023476,3.385984,5.501471,6.241143,-3.451593,-4.936634,-5.598212,4082.7827,4.951989,1.082751,2.085718,2.774945,-1.132489,-1.902640,-2.374886,4715.1862,5.184557,1.270799,2.588942,3.328608,-1.302164,-2.151708,-3.523245,17748.6353,12.468714,4.745064,8.217753,9.787246,-4.885713,-7.910230,-8.745576,18320.0794,12.349696,4.914301,8.471937,10.038728,-5.072881,-8.210557,-8.863233,94002.9082,6.005782,22.286478,34.106343,37.124056,-17.315986,-21.003235,-25.586488,103262.7676,5.902875,24.312530,35.929658,40.291112,-19.304967,-23.439650,-33.633328,17230.8005,9.657979,4.074660,6.230721,6.920663,-4.087401,-5.831561,-6.659088,12660.6886,10.519742,3.028150,4.474734,5.249192,-3.171126,-4.574772,-5.031223,4882.8790,5.139976,1.331016,3.109878,4.606346,-1.349425,-2.323097,-2.753042,5394.4516,5.275126,1.479368,3.502599,4.738105,-1.472294,-2.541416,-3.048168,20301.2505,13.630299,5.448117,9.685876,11.230311,-5.649488,-9.251645,-9.986465,17089.4369,13.418815,4.686718,8.789605,10.349610,-4.741944,-7.752369,-8.587880
9,133600.9805,7.806088,30.785557,46.897557,49.198760,-25.285772,-30.563264,-38.621405,114217.8242,6.585496,27.312500,42.847164,44.993877,-20.739038,-25.069132,-32.696338,17278.7890,10.357104,3.880296,6.277621,7.158966,-4.236471,-6.152162,-6.816933,16310.1106,9.915118,3.620181,6.096359,7.070401,-3.939673,-5.690990,-6.344799,4520.9773,5.401456,1.164143,2.313596,2.927399,-1.275339,-2.152058,-2.552024,5221.3996,5.658784,1.345080,2.812872,3.511243,-1.462019,-2.432758,-3.045471,19767.1342,13.744410,5.108100,8.949866,10.574819,-5.577274,-9.133286,-10.023021,20467.1263,13.648861,5.265490,9.193924,10.855088,-5.794124,-9.507870,-10.337682,103360.2793,6.608673,23.850380,36.579121,38.419416,-18.840849,-22.809535,-29.028251,114002.9297,6.489845,26.040522,38.376639,40.217188,-21.077842,-25.333606,-36.770241,19290.5461,10.673600,4.367124,6.906473,7.744598,-4.679200,-6.801280,-7.376370,14135.5470,11.605377,3.265231,4.995327,5.797673,-3.614346,-5.258827,-5.689870,5409.0804,5.605970,1.398132,3.379307,4.551205,-1.515846,-2.616148,-2.927080,5932.7168,5.731873,1.548923,3.791363,4.910720,-1.647406,-2.861450,-3.421110,22802.1653,15.126447,5.926029,10.116197,12.054465,-6.531589,-10.937161,-11.699235,19098.2395,14.844685,5.078430,9.170270,10.683439,-5.452696,-9.167010,-9.748478
