In [1]:
import numpy as np 
import pandas as pd
from tabulate import tabulate
pd.options.mode.chained_assignment = None

In [2]:
df = pd.read_csv('./input/data-cars.csv')

In [3]:
objects_df = df.select_dtypes(include=['object'])
objects_df.drop('Vehicle Name', inplace=True, axis=1)
for (columnName, columnData) in objects_df.iteritems():
    df[columnName] = df[columnName].replace("*", float("NAN"))
    df[columnName] = df[columnName].astype(str).astype(float)

df = df.fillna(df.mean())

In [4]:
car_types = ['Sedan' , 'Sports Car', 'SUV', 'Wagon', 'Minivan' , 'Pickup']
cars = {}
for ctype in car_types:
    cars[ctype] = df[df[ctype] == 1]

In [5]:
def printTable(title, tdata, headers=[]):
    print('\n')
    print(title)    
    print('\n')
    print(tabulate(tdata, headers= headers, tablefmt="pretty"))
    print('\n')


In [6]:
prices = []
for key, vals in cars.items():
    retail_p = int(vals['Retail Price'].mean())
    dealer_c = int(vals['Dealer Cost'].mean())
    profit = retail_p - dealer_c
    percentage = "{:.2f}".format( profit / retail_p * 100)
    prices.append([key,retail_p, dealer_c, profit, percentage])

headers = ['Car Type', 'AVG Retail Price', 'AVG Dealer Cost', 'AVG Profit', 'AVG Profit %']
title = "Pickups have the highest average profit (9.32%)"

printTable(title, prices, headers)



Pickups have the highest average profit (9.32%)


+------------+------------------+-----------------+------------+--------------+
|  Car Type  | AVG Retail Price | AVG Dealer Cost | AVG Profit | AVG Profit % |
+------------+------------------+-----------------+------------+--------------+
|   Sedan    |      29814       |      27446      |    2368    |     7.94     |
| Sports Car |      53387       |      48473      |    4914    |     9.20     |
|    SUV     |      34790       |      31625      |    3165    |     9.10     |
|   Wagon    |      28840       |      26645      |    2195    |     7.61     |
|  Minivan   |      27796       |      25355      |    2441    |     8.78     |
|   Pickup   |      24941       |      22616      |    2325    |     9.32     |
+------------+------------------+-----------------+------------+--------------+




In [7]:
tdata = []
for key, vals in cars.items():
    fcyl = "{:.2f}".format(vals[vals['Cyl'] == 4]['City MPG'].mean())
    scyl = "{:.2f}".format(vals[vals['Cyl'] == 6]['City MPG'].mean())
    ecyl = "{:.2f}".format(vals[vals['Cyl'] == 8]['City MPG'].mean())
    tdata.append([key , fcyl, scyl, ecyl])
    
title = "Sedans have the Hieghets AVG City MPG per Cyl Count"
headers = ['Car Type', '4 Cyl', '6 Cyl', '8 Cyl']
printTable(title, tdata, headers)  



Sedans have the Hieghets AVG City MPG per Cyl Count


+------------+-------+-------+-------+
|  Car Type  | 4 Cyl | 6 Cyl | 8 Cyl |
+------------+-------+-------+-------+
|   Sedan    | 25.70 | 19.35 | 17.35 |
| Sports Car | 21.18 | 18.70 | 17.01 |
|    SUV     | 21.14 | 16.87 | 13.73 |
|   Wagon    | 24.15 | 18.73 | 16.00 |
|  Minivan   | 20.00 | 17.79 |  nan  |
|   Pickup   | 21.33 | 15.56 | 15.12 |
+------------+-------+-------+-------+




In [8]:
tdata = []
for key, vals in cars.items():
    fcyl = "{:.2f}".format(vals[vals['Cyl'] == 4]['Hwy MPG'].mean())
    scyl = "{:.2f}".format(vals[vals['Cyl'] == 6]['Hwy MPG'].mean())
    ecyl = "{:.2f}".format(vals[vals['Cyl'] == 8]['Hwy MPG'].mean())    
    tdata.append([key , fcyl, scyl, ecyl])
    
title = "Sedans have the Hieghets AVG Hwy MPG per Cyl Count"
headers = ['Car Type', '4 Cyl', '6 Cyl', '8 Cyl']
printTable(title, tdata, headers)  



Sedans have the Hieghets AVG Hwy MPG per Cyl Count


+------------+-------+-------+-------+
|  Car Type  | 4 Cyl | 6 Cyl | 8 Cyl |
+------------+-------+-------+-------+
|   Sedan    | 32.94 | 27.37 | 24.76 |
| Sports Car | 28.00 | 26.10 | 24.14 |
|    SUV     | 25.43 | 21.60 | 17.77 |
|   Wagon    | 31.28 | 25.36 | 22.25 |
|  Minivan   | 26.00 | 24.37 |  nan  |
|   Pickup   | 26.67 | 20.11 | 19.21 |
+------------+-------+-------+-------+




In [9]:
tdata = []
for key, vals in cars.items():
    city = "{:.2f}".format(vals['Cyl'].corr(vals['City MPG']))    
    hwy = "{:.2f}".format(vals['Cyl'].corr(vals['Hwy MPG']))
    tdata.append([key , city, hwy])

headers = ['Car Type', 'City MPG', 'Hwy MPG']
title = "Most car types share a strong negative correlation between Cyl Count and City/Hwy MPG \n Hiegher Cyl Count means Lower City/Hwy MPG"
printTable(title, tdata, headers)  



Most car types share a strong negative correlation between Cyl Count and City/Hwy MPG 
 Hiegher Cyl Count means Lower City/Hwy MPG


+------------+----------+---------+
|  Car Type  | City MPG | Hwy MPG |
+------------+----------+---------+
|   Sedan    |  -0.60   |  -0.62  |
| Sports Car |  -0.44   |  -0.39  |
|    SUV     |  -0.73   |  -0.64  |
|   Wagon    |  -0.75   |  -0.79  |
|  Minivan   |  -0.34   |  -0.15  |
|   Pickup   |  -0.71   |  -0.71  |
+------------+----------+---------+




In [10]:
tdata = []
es = "{:.2f}".format(df['Engine Size (l)'].corr(df['Retail Price']))
cyl = "{:.2f}".format(df['Cyl'].corr(df['Retail Price']))
hp = "{:.2f}".format(df['HP'].corr(df['Retail Price']))
tdata.append([es, cyl, hp])
headers = ['Engine Size (l)', 'Cyl', 'HP']
title = "A positive correlation between engine properties and retail price \nEngine properties significantly affect the retail price"
printTable(title, tdata, headers)



A positive correlation between engine properties and retail price 
Engine properties significantly affect the retail price


+-----------------+------+------+
| Engine Size (l) | Cyl  |  HP  |
+-----------------+------+------+
|      0.57       | 0.63 | 0.83 |
+-----------------+------+------+




In [13]:
tdata = []
for key, vals in cars.items():
    es = "{:.2f}".format(vals['Engine Size (l)'].corr(vals['Retail Price']))
    cyl = "{:.2f}".format(vals['Cyl'].corr(vals['Retail Price']))
    hp = "{:.2f}".format(vals['HP'].corr(vals['Retail Price']))
    
    tdata.append([key , es, cyl, hp]) 

headers = ['Car Type', 'Engine Size (l)', 'Cyl', 'HP']
title = "A detailed correlation report between engine properties and retail price"
printTable(title, tdata, headers)



A detailed correlation report between engine properties and retail price


+------------+-----------------+------+------+
|  Car Type  | Engine Size (l) | Cyl  |  HP  |
+------------+-----------------+------+------+
|   Sedan    |      0.72       | 0.78 | 0.86 |
| Sports Car |      0.50       | 0.50 | 0.80 |
|    SUV     |      0.69       | 0.74 | 0.77 |
|   Wagon    |      0.75       | 0.74 | 0.84 |
|  Minivan   |      0.35       | 0.32 | 0.23 |
|   Pickup   |      0.81       | 0.73 | 0.86 |
+------------+-----------------+------+------+




In [14]:
tdata = []
city = "{:.2f}".format(df['City MPG'].corr(df['Retail Price']))
hwy = "{:.2f}".format(df['Hwy MPG'].corr(df['Retail Price']))
tdata.append([city, hwy])
headers = ['City MPG', 'Hwy MPG']
title = "A negative correlation between City/Hwy MPG and retail price \nFuel-efficient vehicles tend to be more expensive"
printTable(title, tdata, headers)



A negative correlation between City/Hwy MPG and retail price 
Fuel-efficient vehicles tend to be more expensive


+----------+---------+
| City MPG | Hwy MPG |
+----------+---------+
|  -0.45   |  -0.42  |
+----------+---------+




In [15]:
sedans = cars.get("Sedan")
sedans_3l = sedans[sedans['Engine Size (l)'] > 3.0].sort_values(by='City MPG', ascending=True)
top5 = sedans_3l.head(5)
tdata = []
for index, row in top5.iterrows():
    tdata.append([row['Vehicle Name'] , row['Engine Size (l)'], row['Cyl'] , row['City MPG'], row['Hwy MPG'] ])

headers = ['Vehicle Name', 'Engine Size (l)', 'Cyl', 'City MPG', 'Hwy MPG']
title = "Mercedes-Benz CL600 2dr is the most fule efficient sedan with a larger than 3.0L Engine Size"
printTable(title, tdata, headers)



Mercedes-Benz CL600 2dr is the most fule efficient sedan with a larger than 3.0L Engine Size


+-------------------------+-----------------+-----+----------+---------+
|      Vehicle Name       | Engine Size (l) | Cyl | City MPG | Hwy MPG |
+-------------------------+-----------------+-----+----------+---------+
| Mercedes-Benz CL600 2dr |       5.5       | 12  |   13.0   |  19.0   |
|   Audi S4 Quattro 4dr   |       4.2       |  8  |   14.0   |  20.0   |
| Mercedes-Benz CL500 2dr |       5.0       |  8  |   16.0   |  24.0   |
| Mercedes-Benz E500 4dr  |       5.0       |  8  |   16.0   |  20.0   |
| Mercedes-Benz S500 4dr  |       5.0       |  8  |   16.0   |  24.0   |
+-------------------------+-----------------+-----+----------+---------+




In [16]:
cars = df[df['Cyl'] >= 8].sort_values(by='Len', ascending=True)
cars = cars.head(5)
carswed = cars[cars['Len'] == cars['Len'].min()]
carlen = carswed[carswed['Width'] == carswed['Width'].min()]
tdata = []
for index, row in cars.iterrows():
    tdata.append([row['Vehicle Name'] , row['Cyl'], row['Weight'] , row['Width'], row['Len'] ])

headers = ['Vehicle Name', 'Cyl', 'Weight', 'Width', 'Len']
title = carlen.iloc[0]['Vehicle Name'] + " is the most compact vehicle with an 8 or more Cylinders engine"
printTable(title, tdata, headers)



Dodge Viper SRT-10 convertible 2dr is the most compact vehicle with an 8 or more Cylinders engine


+-------------------------------------+-----+--------+-------+-------+
|            Vehicle Name             | Cyl | Weight | Width |  Len  |
+-------------------------------------+-----+--------+-------+-------+
| Dodge Viper SRT-10 convertible 2dr  | 10  | 3410.0 | 75.0  | 176.0 |
|    Lexus SC 430 convertible 2dr     |  8  | 3840.0 | 72.0  | 178.0 |
|    Cadillac XLR convertible 2dr     |  8  | 3647.0 | 72.0  | 178.0 |
|     Mercedes-Benz SL55 AMG 2dr      |  8  | 4235.0 | 72.0  | 179.0 |
| Mercedes-Benz SL600 convertible 2dr | 12  | 4429.0 | 72.0  | 179.0 |
+-------------------------------------+-----+--------+-------+-------+




In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 428 entries, 0 to 427
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Vehicle Name     428 non-null    object 
 1   Sedan            428 non-null    int64  
 2   Sports Car       428 non-null    int64  
 3   SUV              428 non-null    int64  
 4   Wagon            428 non-null    int64  
 5   Minivan          428 non-null    int64  
 6   Pickup           428 non-null    int64  
 7   AWD              428 non-null    int64  
 8   RWD              428 non-null    int64  
 9   Retail Price     428 non-null    int64  
 10  Dealer Cost      428 non-null    int64  
 11  Engine Size (l)  428 non-null    float64
 12  Cyl              428 non-null    int64  
 13  HP               428 non-null    int64  
 14  City MPG         428 non-null    float64
 15  Hwy MPG          428 non-null    float64
 16  Weight           428 non-null    float64
 17  Wheel Base      