#### SUMMARY STATISTICS

<br>

## Summarizing Automobile Evaluation Data
<hr>

In [2]:
import pandas as pd
import numpy as np

car_eval = pd.read_csv('car_eval_dataset.csv')
car_eval.head()

Unnamed: 0,buying_cost,maintenance_cost,doors,capacity,luggage,safety,acceptability,manufacturer_country
0,vhigh,low,4,4,small,med,unacc,China
1,vhigh,med,3,4,small,high,acc,France
2,med,high,3,2,med,high,unacc,United States
3,low,med,4,more,big,low,unacc,United States
4,low,high,2,more,med,high,acc,South Korea


### Summarizing Manufacturing Country

In [3]:
manufacturer_country_frequency = car_eval['manufacturer_country'].value_counts()
print(manufacturer_country_frequency)

Japan            228
Germany          218
South Korea      159
United States    138
Italy             97
France            87
China             73
Name: manufacturer_country, dtype: int64


In [7]:
fourth_popular_manufacturer_country = manufacturer_country_frequency.index[4-1]
print("The fourth popular manufacturer country is: " + str(fourth_popular_manufacturer_country))

The fourth popular manufacturer country is: United States


In [5]:
manufacturer_country_proportion = car_eval['manufacturer_country'].value_counts(normalize = True)
print(manufacturer_country_proportion)

Japan            0.228
Germany          0.218
South Korea      0.159
United States    0.138
Italy            0.097
France           0.087
China            0.073
Name: manufacturer_country, dtype: float64


In [8]:
japan_proportion = (car_eval['manufacturer_country'] == 'Japan').mean()
print("The proportion of Japan is: " + str(japan_proportion))

The proportion of Japan is: 0.228


### Summarizing Buying Costs

In [9]:
buying_cost = car_eval['buying_cost'].unique()
print(buying_cost)

['vhigh' 'med' 'low' 'high']


In [10]:
buying_cost_categories = ['low', 'med', 'high', 'vhigh']

car_eval['buying_cost'] = pd.Categorical(car_eval['buying_cost'], buying_cost_categories, ordered = True)
print(car_eval['buying_cost'])

0      vhigh
1      vhigh
2        med
3        low
4        low
       ...  
995      low
996      low
997    vhigh
998      low
999      low
Name: buying_cost, Length: 1000, dtype: category
Categories (4, object): ['low' < 'med' < 'high' < 'vhigh']


In [13]:
buying_cost_median_num = np.median(car_eval['buying_cost'].cat.codes)
print(buying_cost_median_num)

buying_cost_median = buying_cost_categories[int(buying_cost_median_num)]
print(buying_cost_median)

1.0
med


### Summarizing Luggage Capacity

In [14]:
luggage_prop_1 = car_eval['luggage'].value_counts(normalize = True)
print(luggage_prop_1)

small    0.339
med      0.333
big      0.328
Name: luggage, dtype: float64


In [15]:
luggage_missing_na = car_eval['luggage'].value_counts(normalize = True, dropna = False)
print(luggage_missing_na)

small    0.339
med      0.333
big      0.328
Name: luggage, dtype: float64


In [16]:
luggage_prop_2 = car_eval['luggage'].value_counts() / len(car_eval['luggage'])
print(luggage_prop_2)

small    0.339
med      0.333
big      0.328
Name: luggage, dtype: float64


### Summarizing Passenger Capacity

In [17]:
doors = car_eval['doors'].unique()
print(doors)

['4' '3' '2' '5more']


In [19]:
doors_5more_freq = (car_eval['doors'] == '5more').sum()
print("Frequency: " + str(doors_5more_freq))

doors_5more_prop = (car_eval['doors'] == '5more').mean()
print("Proportion: " + str(doors_5more_prop))

Frequency: 246
Proportion: 0.246
