In [1]:
import sys
sys.path.append("aux/modules")
import cf_data, cf_plot, cf_radar

### The original Dataset

In [2]:
pdDataset = cf_data.getRawData()
pdDataset.head()

Unnamed: 0,riskiness,losses,manufacturer,fuel type,aspiration,doors,body,drive,engine location,wheel base,...,engine size,fuel system,bore,stroke,compression ratio,horsepower,peak rpm,city mpg,highway mpg,price
0,2,164,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
1,2,164,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450
2,1,158,audi,gas,std,four,sedan,fwd,front,105.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
3,1,158,audi,gas,turbo,four,sedan,fwd,front,105.8,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
4,2,192,bmw,gas,std,two,sedan,rwd,front,101.2,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430


### Creating a subset of the original Dataset

In [3]:
pdSubset = cf_data.getLimitedData()
pdSubset.head()

Unnamed: 0,manufacturer,price,city mpg,highway mpg,horsepower,weight,riskiness,losses
0,audi,13950,24,30,102,2337,2,164
1,audi,17450,18,22,115,2824,2,164
2,audi,17710,19,25,110,2844,1,158
3,audi,23875,17,20,140,3086,1,158
4,bmw,16430,23,29,101,2395,2,192


### Getting all manufacturer names

In [None]:
cf_data.getAllManufacturers()

array(['audi', 'bmw', 'chevrolet', 'dodge', 'honda', 'jaguar', 'mazda',
       'mercedes-benz', 'mitsubishi', 'nissan', 'peugot', 'plymouth',
       'porsche', 'saab', 'subaru', 'toyota', 'volkswagen', 'volvo'],
      dtype=object)

### Getting all occurrences of each manufacturer

In [None]:
(manufacturerNames, manufacturerCounts) = cf_data.getManufacturerCount(pdSubset); manufacturerCounts

### Getting the total occurrences of manufacturers with more than 6 occurrences each one

In [None]:
pdMajorManufacturers = cf_data.getLimitedData(lowerBound = 6)
len(pdMajorManufacturers.index)

### Normalizing the Dataset - Columns

In [None]:
pdNormalized = pdMajorManufacturers.copy()

In [None]:
pdNormalized.rename(
    columns = {
        'city mpg': 'mpg_city',
        'highway mpg': 'mpg_highway',
        'horsepower': 'horse_power'
    }, 
    inplace = True
)

In [None]:
pdNormalized.head()

### Normalizing the Dataset - Values

In [None]:
cf_data.normalizeColumns(['mpg_city', 'mpg_highway', 'horse_power'], pdNormalized); pdNormalized.head()

In [None]:
cf_data.normalizeColumnsInverted(['price', 'weight', 'riskiness', 'losses'], pdNormalized); pdNormalized.head()

### Plotting

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from IPython.display import Image
import warnings
# warnings.filterwarnings('ignore')

plt.style.use('aux/mpl-styles/dark.mplstyle')

In [None]:
figure = plt.figure(figsize = (15, 5))
gridSpec = mpl.gridspec.GridSpec(1, 1)
axes = cf_plot.priceByManufacturer(figure, gridSpec, pdMajorManufacturers)

plt.show()

In [None]:
figure = plt.figure(figsize = (15, 5))
gridSpec = mpl.gridspec.GridSpec(1, 1)
axes = cf_plot.mpgByManufacturer(figure, gridSpec, pdMajorManufacturers)

plt.show()

In [None]:
figure = plt.figure(figsize = (15, 5))
gridSpec = mpl.gridspec.GridSpec(1, 1)
axes = cf_plot.inverseRiskinessByManufacturer(figure, gridSpec, pdNormalized)

plt.show()

In [None]:
figure = plt.figure(figsize = (15, 5))
gridSpec = mpl.gridspec.GridSpec(1, 1)
axes = cf_plot.inverseLossesByManufacturer(figure, gridSpec, pdNormalized)

plt.show()

In [None]:
figure = plt.figure(figsize = (15, 5))
gridSpec = mpl.gridspec.GridSpec(1, 1)
axes = cf_plot.inverseRiskinessAndLossesCombinedByManufacturer(figure, gridSpec, pdNormalized)

plt.show()

In [None]:
figure = plt.figure(figsize = (15, 5))
gridSpec = mpl.gridspec.GridSpec(
    3, 7,
    height_ratios = [1, 10, 10],
    wspace = 0.5,
    hspace = 0.5,
    top = 2,
    bottom = 0.25
)
axes = cf_plot.radarByManufacturer(figure, gridSpec, pdNormalized)

plt.show()