### Importing Libraries

In [11]:
import pandas as pd

### Reading csv file

In [12]:
car_eval = pd.read_csv('car_eval_dataset.csv')
car_eval.head()

Unnamed: 0,buying_cost,maintenance_cost,doors,capacity,luggage,safety,acceptability,manufacturer_country
0,vhigh,low,4,4,small,med,unacc,China
1,vhigh,med,3,4,small,high,acc,France
2,med,high,3,2,med,high,unacc,United States
3,low,med,4,more,big,low,unacc,United States
4,low,high,2,more,med,high,acc,South Korea


### Table of Frequencies using value_counts()

In [14]:
car_eval.manufacturer_country.value_counts()

Japan            228
Germany          218
South Korea      159
United States    138
Italy             97
France            87
China             73
Name: manufacturer_country, dtype: int64

### Retrieving the 4th Most Frequent Car Manufacturing Country

In [13]:
car_eval.manufacturer_country.value_counts().index[3]

'United States'

### Table of Proportions for Maunfacturing Countries

In [15]:
car_eval.manufacturer_country.value_counts(normalize = True)

Japan            0.228
Germany          0.218
South Korea      0.159
United States    0.138
Italy            0.097
France           0.087
China            0.073
Name: manufacturer_country, dtype: float64

### Obtaining unique buying costs of cars

In [16]:
car_eval.buying_cost.unique()

array(['vhigh', 'med', 'low', 'high'], dtype=object)

### Converting buying cost to Categorical

In [27]:
buying_cost_categories = ['low', 'med', 'high', 'vhigh']
car_eval.buying_cost = pd.Categorical(car_eval.buying_cost, buying_cost_categories, ordered = True)
car_eval.buying_cost

0      vhigh
1      vhigh
2        med
3        low
4        low
       ...  
995      low
996      low
997    vhigh
998      low
999      low
Name: buying_cost, Length: 1000, dtype: category
Categories (4, object): ['low' < 'med' < 'high' < 'vhigh']

### Obtaining the median buying cost category

In [32]:
median_cat_num = (car_eval['buying_cost'].cat.codes).median()
median_cat_num

1.0

In [33]:
median_category = buying_cost_categories[int(median_cat_num)]
median_category

'med'

### Table of Proportions for Luggage

In [18]:
car_eval.luggage.value_counts(normalize = True)

small    0.339
med      0.333
big      0.328
Name: luggage, dtype: float64

### Table of Proportions for Luggage with Null Values Inclusive

In [19]:
car_eval.luggage.value_counts(normalize = True, dropna = False)

small    0.339
med      0.333
big      0.328
Name: luggage, dtype: float64

### Table of Proportions for Luggage without the 'Normalize' parameter

In [20]:
car_eval.luggage.value_counts()/len(car_eval)

small    0.339
med      0.333
big      0.328
Name: luggage, dtype: float64

### Cars with 5 or more doors

In [21]:
(car_eval.doors == '5more').sum()

246

### Different approaches in finding proportion of cars with 5 or more doors

In [34]:
(car_eval.doors == '5more').sum()/len(car_eval)

0.246

In [35]:
car_eval.doors.value_counts(normalize = True)[2]

0.246

In [36]:
(car_eval.doors == '5more').mean()

0.246