# Pandas Exercise

In [58]:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(0)
import pandas as pd

In [59]:
def df_info(df: pd.DataFrame) -> None:
    return df.head(n=20).style

## Cars Auction Dataset

| Feature      | Type    | Description                                                                                                                                                                                                             |
|--------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Price        | Integer | The sale price of the vehicle in the ad                                                                                                                                                                                     |
| Years        | Integer | The vehicle registration year                                                                                                                                                                                               |
| Brand        | String  | The brand of car                                                                                                                                                                                                            |
| Model        | String  | model of the vehicle                                                                                                                                                                                                        |
| Color        | String  | Color of the vehicle                                                                                                                                                                                                        |
| State/City   | String  | The location in which the car is being available for purchase                                                                                                                                                               |
| Mileage      | Float   | miles traveled by vehicle                                                                                                                                                                                                   |
| Title Status | String  | This feature included binary classification, which are clean title vehicles and salvage insurance                                                                                                                           |
| Condition    | String  | Time                                                                                                                                                                                                                        |

In [60]:
df = pd.read_csv("../data/USA_cars_datasets.csv")

print(df.columns)

Index(['price', 'brand', 'model', 'year', 'title_status', 'mileage', 'color',
       'state', 'country', 'condition'],
      dtype='object')


In [61]:
df.head()

Unnamed: 0,price,brand,model,year,title_status,mileage,color,state,country,condition
0,6300,toyota,cruiser,2008,clean vehicle,274117.0,black,new jersey,usa,10 days left
1,2899,ford,se,2011,clean vehicle,190552.0,silver,tennessee,usa,6 days left
2,5350,dodge,mpv,2018,clean vehicle,39590.0,silver,georgia,usa,2 days left
3,25000,ford,door,2014,clean vehicle,64146.0,blue,virginia,usa,22 hours left
4,27700,chevrolet,1500,2018,clean vehicle,6654.0,red,florida,usa,22 hours left


## Exercise 1

- Get the counts for the us states

In [62]:
df["state"].value_counts().head(20)

pennsylvania      299
florida           246
texas             214
california        190
michigan          169
north carolina    146
minnesota         119
illinois          113
wisconsin          94
virginia           90
new jersey         87
nevada             85
oklahoma           71
south carolina     64
new york           58
georgia            51
missouri           46
arizona            33
ohio               31
massachusetts      27
Name: state, dtype: int64

## Exercise 2

- Get all cars from the state of new mexico

In [63]:
print(f'Number of cars from new mexico: {df["state"].value_counts()["new mexico"]}')

nm_filter = (df["state"] == "new mexico")
nm_cars = df[nm_filter]
print("List of new mexico's cars:")
df_info(nm_cars)

Number of cars from new mexico: 4
List of new mexico's cars:


Unnamed: 0,price,brand,model,year,title_status,mileage,color,state,country,condition
1151,25300,dodge,charger,2019,clean vehicle,17515.0,red,new mexico,usa,21 hours left
1153,29500,dodge,durango,2019,clean vehicle,14656.0,gray,new mexico,usa,21 hours left
1695,27900,ford,mustang,2019,clean vehicle,13558.0,white,new mexico,usa,21 hours left
2331,26800,nissan,titan,2018,clean vehicle,33605.0,white,new mexico,usa,21 hours left


## Exercise 3

- Compute the mean mileage of all cars from new york

In [64]:
ny_filter = (df["state"] == "new york")
print(f"Mean mileage of all cars from new york: {df[ny_filter].mileage.mean():.2f}")

Mean mileage of all cars from new york: 55144.57


## Exercise 4

- Remove all entries where the year is below 2019

In [65]:
year_filter = (df.year < 2019)  #also works with dot notation instead of ["year"]

print(f"Length bevor drop: {df.shape[0]}")
df.drop(df[year_filter].index, axis=0, inplace=True)
print(f"Length after drop: {df.shape[0]}")


Length bevor drop: 2499
Length after drop: 940


## Exercise 5

- Replace all color values by the first character of the color name
E.g.: 'blue' => 'b'

In [66]:
df.color = df.color.transform(lambda color_string: color_string[0])
df_info(df)


Unnamed: 0,price,brand,model,year,title_status,mileage,color,state,country,condition
26,21500,kia,sorento,2020,clean vehicle,11451.0,b,north carolina,usa,2 days left
46,16200,jeep,compass,2019,clean vehicle,30061.0,r,north carolina,usa,2 days left
51,16000,jeep,compass,2019,clean vehicle,31594.0,s,north carolina,usa,2 days left
54,19600,chrysler,300,2019,clean vehicle,42648.0,g,south carolina,usa,22 hours left
56,20000,jeep,cherokee,2019,clean vehicle,38355.0,c,north carolina,usa,2 days left
58,23000,chrysler,pacifica,2020,clean vehicle,2473.0,w,ohio,usa,2 days left
59,18500,chrysler,300,2019,clean vehicle,38586.0,g,south carolina,usa,22 hours left
61,20700,jeep,cherokee,2019,clean vehicle,31156.0,w,north carolina,usa,2 days left
64,29100,chrysler,pacifica,2019,clean vehicle,25281.0,r,south carolina,usa,22 hours left
66,21100,jeep,cherokee,2019,clean vehicle,28649.0,s,north carolina,usa,2 days left
