In this guided project, we'll work with a dataset of used cars from eBay Kleinanzeigen, a [classifieds](https://en.wikipedia.org/wiki/Classified_advertising) section of the German eBay website.
The dataset was originally [scraped](https://en.wikipedia.org/wiki/Web_scraping) and uploaded to [Kaggle](https://www.kaggle.com/orgesleka/used-cars-database/data). We've made a few modifications from the original dataset that was uploaded to Kaggle:
- We sampled 50,000 data points from the full dataset, to ensure your code runs quickly in our hosted environment
- We dirtied the dataset a bit to more closely resemble what you would expect from a scraped dataset (the version uploaded to Kaggle was cleaned to be easier to work with)


In [23]:
import numpy as np, pandas as pd

In [24]:
autos = pd.read_csv("autos.csv", encoding = 'Latin-1')

In [25]:
autos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 371528 entries, 0 to 371527
Data columns (total 20 columns):
dateCrawled            371528 non-null object
name                   371528 non-null object
seller                 371528 non-null object
offerType              371528 non-null object
price                  371528 non-null int64
abtest                 371528 non-null object
vehicleType            333659 non-null object
yearOfRegistration     371528 non-null int64
gearbox                351319 non-null object
powerPS                371528 non-null int64
model                  351044 non-null object
kilometer              371528 non-null int64
monthOfRegistration    371528 non-null int64
fuelType               338142 non-null object
brand                  371528 non-null object
notRepairedDamage      299468 non-null object
dateCreated            371528 non-null object
nrOfPictures           371528 non-null int64
postalCode             371528 non-null int64
lastSeen              

In [26]:
autos.head()

Unnamed: 0,dateCrawled,name,seller,offerType,price,abtest,vehicleType,yearOfRegistration,gearbox,powerPS,model,kilometer,monthOfRegistration,fuelType,brand,notRepairedDamage,dateCreated,nrOfPictures,postalCode,lastSeen
0,2016-03-24 11:52:17,Golf_3_1.6,privat,Angebot,480,test,,1993,manuell,0,golf,150000,0,benzin,volkswagen,,2016-03-24 00:00:00,0,70435,2016-04-07 03:16:57
1,2016-03-24 10:58:45,A5_Sportback_2.7_Tdi,privat,Angebot,18300,test,coupe,2011,manuell,190,,125000,5,diesel,audi,ja,2016-03-24 00:00:00,0,66954,2016-04-07 01:46:50
2,2016-03-14 12:52:21,"Jeep_Grand_Cherokee_""Overland""",privat,Angebot,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,,2016-03-14 00:00:00,0,90480,2016-04-05 12:47:46
3,2016-03-17 16:54:04,GOLF_4_1_4__3TÜRER,privat,Angebot,1500,test,kleinwagen,2001,manuell,75,golf,150000,6,benzin,volkswagen,nein,2016-03-17 00:00:00,0,91074,2016-03-17 17:40:17
4,2016-03-31 17:25:20,Skoda_Fabia_1.4_TDI_PD_Classic,privat,Angebot,3600,test,kleinwagen,2008,manuell,69,fabia,90000,7,diesel,skoda,nein,2016-03-31 00:00:00,0,60437,2016-04-06 10:17:21


- The dataset contains 20 columns, most of which are strings.
- Some columns have null values, but none have more than ~20% null values.
- The column names use camelcase instead of Python's preferred snakecase, which means we can't just replace spaces with underscores.

In [27]:
print(autos.columns)

Index(['dateCrawled', 'name', 'seller', 'offerType', 'price', 'abtest',
       'vehicleType', 'yearOfRegistration', 'gearbox', 'powerPS', 'model',
       'kilometer', 'monthOfRegistration', 'fuelType', 'brand',
       'notRepairedDamage', 'dateCreated', 'nrOfPictures', 'postalCode',
       'lastSeen'],
      dtype='object')


In [28]:
autos.rename(columns={"yearOfRegistration":"registration_year"}, inplace = True)
autos.rename(columns={"monthOfRegistration":"registration_month"}, inplace = True)
autos.rename(columns={"notRepairedDamage":"unrepaired_damage"}, inplace = True)
autos.rename(columns={"dateCreated":"ad_created"}, inplace = True)
print(autos.columns)

Index(['dateCrawled', 'name', 'seller', 'offerType', 'price', 'abtest',
       'vehicleType', 'registration_year', 'gearbox', 'powerPS', 'model',
       'kilometer', 'registration_month', 'fuelType', 'brand',
       'unrepaired_damage', 'ad_created', 'nrOfPictures', 'postalCode',
       'lastSeen'],
      dtype='object')


In [29]:
autos.rename(columns={"dateCrawled":"date_crawled"}, inplace = True)
autos.rename(columns={"offerType":"offer_type"}, inplace = True)
autos.rename(columns={"vehicleType":"vehicle_type"}, inplace = True)
autos.rename(columns={"powerPS":"power_ps"}, inplace = True)
autos.rename(columns={"fuelType":"fuel_type"}, inplace = True)
autos.rename(columns={"nrOfPictures":"nr_of_pictures"}, inplace = True)
autos.rename(columns={"postalCode":"postal_code"}, inplace = True)
autos.rename(columns={"lastSeen":"last_seen"}, inplace = True)
autos.rename(columns={"kilometer":"odometer_km"}, inplace = True)
print(autos.columns)

Index(['date_crawled', 'name', 'seller', 'offer_type', 'price', 'abtest',
       'vehicle_type', 'registration_year', 'gearbox', 'power_ps', 'model',
       'odometer_km', 'registration_month', 'fuel_type', 'brand',
       'unrepaired_damage', 'ad_created', 'nr_of_pictures', 'postal_code',
       'last_seen'],
      dtype='object')


In [30]:
autos.head()

Unnamed: 0,date_crawled,name,seller,offer_type,price,abtest,vehicle_type,registration_year,gearbox,power_ps,model,odometer_km,registration_month,fuel_type,brand,unrepaired_damage,ad_created,nr_of_pictures,postal_code,last_seen
0,2016-03-24 11:52:17,Golf_3_1.6,privat,Angebot,480,test,,1993,manuell,0,golf,150000,0,benzin,volkswagen,,2016-03-24 00:00:00,0,70435,2016-04-07 03:16:57
1,2016-03-24 10:58:45,A5_Sportback_2.7_Tdi,privat,Angebot,18300,test,coupe,2011,manuell,190,,125000,5,diesel,audi,ja,2016-03-24 00:00:00,0,66954,2016-04-07 01:46:50
2,2016-03-14 12:52:21,"Jeep_Grand_Cherokee_""Overland""",privat,Angebot,9800,test,suv,2004,automatik,163,grand,125000,8,diesel,jeep,,2016-03-14 00:00:00,0,90480,2016-04-05 12:47:46
3,2016-03-17 16:54:04,GOLF_4_1_4__3TÜRER,privat,Angebot,1500,test,kleinwagen,2001,manuell,75,golf,150000,6,benzin,volkswagen,nein,2016-03-17 00:00:00,0,91074,2016-03-17 17:40:17
4,2016-03-31 17:25:20,Skoda_Fabia_1.4_TDI_PD_Classic,privat,Angebot,3600,test,kleinwagen,2008,manuell,69,fabia,90000,7,diesel,skoda,nein,2016-03-31 00:00:00,0,60437,2016-04-06 10:17:21


In [31]:
autos.describe(include='all')

Unnamed: 0,date_crawled,name,seller,offer_type,price,abtest,vehicle_type,registration_year,gearbox,power_ps,model,odometer_km,registration_month,fuel_type,brand,unrepaired_damage,ad_created,nr_of_pictures,postal_code,last_seen
count,371528,371528,371528,371528,371528.0,371528,333659,371528.0,351319,371528.0,351044,371528.0,371528.0,338142,371528,299468,371528,371528.0,371528.0,371528
unique,280500,233531,2,2,,2,8,,2,,251,,,7,40,2,114,,,182806
top,2016-03-24 14:49:47,Ford_Fiesta,privat,Angebot,,test,limousine,,manuell,,golf,,,benzin,volkswagen,nein,2016-04-03 00:00:00,,,2016-04-07 06:45:59
freq,7,657,371525,371516,,192585,95894,,274214,,30070,,,223857,79640,263182,14450,,,17
mean,,,,,17295.14,,,2004.577997,,115.549477,,125618.688228,5.734445,,,,,0.0,50820.66764,
std,,,,,3587954.0,,,92.866598,,192.139578,,40112.337051,3.712412,,,,,0.0,25799.08247,
min,,,,,0.0,,,1000.0,,0.0,,5000.0,0.0,,,,,0.0,1067.0,
25%,,,,,1150.0,,,1999.0,,70.0,,125000.0,3.0,,,,,0.0,30459.0,
50%,,,,,2950.0,,,2003.0,,105.0,,150000.0,6.0,,,,,0.0,49610.0,
75%,,,,,7200.0,,,2008.0,,150.0,,150000.0,9.0,,,,,0.0,71546.0,


there are a number of text columns where almost all of the values are the same

In [34]:
autos["odometer_km"].unique().shape

(13,)

In [35]:
autos["odometer_km"].describe()

count    371528.000000
mean     125618.688228
std       40112.337051
min        5000.000000
25%      125000.000000
50%      150000.000000
75%      150000.000000
max      150000.000000
Name: odometer_km, dtype: float64

In [36]:
autos["odometer_km"].value_counts()

150000    240797
125000     38067
100000     15920
90000      12523
80000      11053
70000       9773
60000       8669
50000       7615
5000        7069
40000       6376
30000       6041
20000       5676
10000       1949
Name: odometer_km, dtype: int64

In [37]:
autos["date_crawled"].describe()

count                  371528
unique                 280500
top       2016-03-24 14:49:47
freq                        7
Name: date_crawled, dtype: object

In [39]:
autos["date_crawled"].value_counts(normalize=True, dropna=False).sort_index() 

2016-03-05 14:06:22    0.000003
2016-03-05 14:06:23    0.000003
2016-03-05 14:06:24    0.000008
2016-03-05 14:06:25    0.000005
2016-03-05 14:06:26    0.000003
2016-03-05 14:06:27    0.000005
2016-03-05 14:06:28    0.000003
2016-03-05 14:06:29    0.000005
2016-03-05 14:06:30    0.000005
2016-03-05 14:06:40    0.000003
2016-03-05 14:06:47    0.000005
2016-03-05 14:06:48    0.000003
2016-03-05 14:06:49    0.000003
2016-03-05 14:06:50    0.000005
2016-03-05 14:07:00    0.000003
2016-03-05 14:07:01    0.000003
2016-03-05 14:07:02    0.000003
2016-03-05 14:07:03    0.000003
2016-03-05 14:07:04    0.000008
2016-03-05 14:07:05    0.000003
2016-03-05 14:07:06    0.000005
2016-03-05 14:07:07    0.000003
2016-03-05 14:07:08    0.000008
2016-03-05 14:07:09    0.000003
2016-03-05 14:07:20    0.000003
2016-03-05 14:07:21    0.000005
2016-03-05 14:07:25    0.000003
2016-03-05 14:07:26    0.000003
2016-03-05 14:07:27    0.000005
2016-03-05 14:07:28    0.000003
                         ...   
2016-04-

In [40]:
autos.describe(include='all')

Unnamed: 0,date_crawled,name,seller,offer_type,price,abtest,vehicle_type,registration_year,gearbox,power_ps,model,odometer_km,registration_month,fuel_type,brand,unrepaired_damage,ad_created,nr_of_pictures,postal_code,last_seen
count,371528,371528,371528,371528,371528.0,371528,333659,371528.0,351319,371528.0,351044,371528.0,371528.0,338142,371528,299468,371528,371528.0,371528.0,371528
unique,280500,233531,2,2,,2,8,,2,,251,,,7,40,2,114,,,182806
top,2016-03-24 14:49:47,Ford_Fiesta,privat,Angebot,,test,limousine,,manuell,,golf,,,benzin,volkswagen,nein,2016-04-03 00:00:00,,,2016-04-07 06:45:59
freq,7,657,371525,371516,,192585,95894,,274214,,30070,,,223857,79640,263182,14450,,,17
mean,,,,,17295.14,,,2004.577997,,115.549477,,125618.688228,5.734445,,,,,0.0,50820.66764,
std,,,,,3587954.0,,,92.866598,,192.139578,,40112.337051,3.712412,,,,,0.0,25799.08247,
min,,,,,0.0,,,1000.0,,0.0,,5000.0,0.0,,,,,0.0,1067.0,
25%,,,,,1150.0,,,1999.0,,70.0,,125000.0,3.0,,,,,0.0,30459.0,
50%,,,,,2950.0,,,2003.0,,105.0,,150000.0,6.0,,,,,0.0,49610.0,
75%,,,,,7200.0,,,2008.0,,150.0,,150000.0,9.0,,,,,0.0,71546.0,


In [43]:
autos["registration_year"].describe()

count    371528.000000
mean       2004.577997
std          92.866598
min        1000.000000
25%        1999.000000
50%        2003.000000
75%        2008.000000
max        9999.000000
Name: registration_year, dtype: float64

One thing that stands out from the exploration we did in the last screen is that the registration_year column contains some odd values:

- The minimum value is 1000, before cars were invented
- The maximum value is 9999, many years into the future
Because a car can't be first registered after the listing was seen, any vehicle with a registration year above 2016 is definitely inaccurate. Determining the earliest valid year is more difficult. Realistically, it could be somewhere in the first few decades of the 1900s.

In [44]:
autos["registration_year"] = autos[(autos["registration_year"]>1950) & autos["registration_year"]<2019]

In [45]:
autos["registration_year"].describe()

count                  371528
unique                 280500
top       2016-03-24 14:49:47
freq                        7
Name: registration_year, dtype: object

In [46]:
autos["registration_year"].value_counts(normalize=True)

2016-03-24 14:49:47    0.000019
2016-03-19 21:49:56    0.000016
2016-03-26 22:57:31    0.000016
2016-03-14 16:51:53    0.000013
2016-03-21 14:50:20    0.000013
2016-03-16 13:47:44    0.000013
2016-04-02 21:54:36    0.000013
2016-03-05 14:44:30    0.000013
2016-03-07 17:36:19    0.000013
2016-03-28 10:36:47    0.000013
2016-03-05 14:25:23    0.000013
2016-03-11 15:36:59    0.000013
2016-04-02 22:54:55    0.000013
2016-03-31 16:50:28    0.000013
2016-03-09 13:50:56    0.000013
2016-04-04 22:38:11    0.000013
2016-03-31 18:50:16    0.000013
2016-04-01 15:56:46    0.000013
2016-04-01 16:52:05    0.000013
2016-03-22 15:50:32    0.000013
2016-03-16 17:43:49    0.000013
2016-03-26 10:51:07    0.000013
2016-04-01 19:25:23    0.000013
2016-03-31 17:57:07    0.000013
2016-03-29 22:50:49    0.000013
2016-03-08 15:50:29    0.000013
2016-04-02 14:50:21    0.000013
2016-03-09 16:48:39    0.000013
2016-04-03 13:41:21    0.000013
2016-03-20 16:50:22    0.000013
                         ...   
2016-03-

In [47]:
autos.describe(include='all')

Unnamed: 0,date_crawled,name,seller,offer_type,price,abtest,vehicle_type,registration_year,gearbox,power_ps,model,odometer_km,registration_month,fuel_type,brand,unrepaired_damage,ad_created,nr_of_pictures,postal_code,last_seen
count,371528,371528,371528,371528,371528.0,371528,333659,371528,351319,371528.0,351044,371528.0,371528.0,338142,371528,299468,371528,371528.0,371528.0,371528
unique,280500,233531,2,2,,2,8,280500,2,,251,,,7,40,2,114,,,182806
top,2016-03-24 14:49:47,Ford_Fiesta,privat,Angebot,,test,limousine,2016-03-24 14:49:47,manuell,,golf,,,benzin,volkswagen,nein,2016-04-03 00:00:00,,,2016-04-07 06:45:59
freq,7,657,371525,371516,,192585,95894,7,274214,,30070,,,223857,79640,263182,14450,,,17
mean,,,,,17295.14,,,,,115.549477,,125618.688228,5.734445,,,,,0.0,50820.66764,
std,,,,,3587954.0,,,,,192.139578,,40112.337051,3.712412,,,,,0.0,25799.08247,
min,,,,,0.0,,,,,0.0,,5000.0,0.0,,,,,0.0,1067.0,
25%,,,,,1150.0,,,,,70.0,,125000.0,3.0,,,,,0.0,30459.0,
50%,,,,,2950.0,,,,,105.0,,150000.0,6.0,,,,,0.0,49610.0,
75%,,,,,7200.0,,,,,150.0,,150000.0,9.0,,,,,0.0,71546.0,


In [50]:
autos["brand"].describe

<bound method NDFrame.describe of 0             volkswagen
1                   audi
2                   jeep
3             volkswagen
4                  skoda
5                    bmw
6                peugeot
7             volkswagen
8                   ford
9             volkswagen
10                 mazda
11            volkswagen
12            volkswagen
13            volkswagen
14                nissan
15                  ford
16            volkswagen
17               renault
18                  ford
19         mercedes_benz
20            volkswagen
21                   bmw
22                  opel
23                  seat
24         mercedes_benz
25                   bmw
26               citroen
27                 honda
28            volkswagen
29                  fiat
               ...      
371498              opel
371499              opel
371500        volkswagen
371501          chrysler
371502              audi
371503            subaru
371504     mercedes_benz
371505          

In [64]:
# Create an empty dictionary to store the results
avg_rev_by_price = {}

# Create an array of unique brands
brands = autos["brand"].unique()

# Use a for loop to iterate over the brands
for b in brands:
    # Use boolean comparison to select only rows that
    # correspond to a specific brand
    selected_rows = autos[autos["brand"] == b]
    # Calculate the mean average price for just those rows
    mean = selected_rows["price"].mean()
    
    # Assign the mean value to the dictionary, using the
    # brand name as the key
    
    avg_rev_by_price[b] = mean

In [65]:
print(avg_rev_by_price)

{'volkswagen': 14533.496007031643, 'audi': 15868.5142213975, 'jeep': 12009.677819083023, 'skoda': 6411.183478106719, 'bmw': 14844.144882554501, 'peugeot': 3166.9815906411536, 'ford': 8462.054706135377, 'mazda': 5680.361194029851, 'nissan': 4527.5108199324995, 'renault': 2334.258945962491, 'mercedes_benz': 17244.060437848708, 'opel': 3223.522448674507, 'seat': 4356.66604955853, 'citroen': 8880.624855268235, 'honda': 3804.6301128349787, 'fiat': 5326.312835882596, 'mini': 9846.432233352976, 'smart': 3531.5620118117736, 'hyundai': 5415.057048820625, 'sonstige_autos': 615647.6243093923, 'alfa_romeo': 35737.1697228145, 'subaru': 4248.9486521181, 'volvo': 8774.005109708445, 'mitsubishi': 3273.8967657628227, 'kia': 5656.574168297456, 'suzuki': 4371.6834192439865, 'lancia': 3165.754132231405, 'porsche': 51089.85733634311, 'toyota': 5232.657008947593, 'chevrolet': 7447.764227642277, 'dacia': 5839.654444444444, 'daihatsu': 1693.6563275434244, 'trabant': 20415.935702199662, 'saab': 3820.0415094339

we aggregated across brands to understand mean price. We observed that in the top 6 brands, there's a distinct price gap.

- Audi, BMW and Mercedes Benz are more expensive
- Ford and Opel are less expensive
- Volkswagen is in between

In [67]:
# Create an empty dictionary to store the results
avg_rev_by_miles = {}

# Create an array of unique brands
brands = autos["brand"].unique()

# Use a for loop to iterate over the brands
for b in brands:
    # Use boolean comparison to select only rows that
    # correspond to a specific brand
    selected_rows = autos[autos["brand"] == b]
    # Calculate the mean average price for just those rows
    mean = selected_rows["odometer_km"].mean()
    
    # Assign the mean value to the dictionary, using the
    # brand name as the key
    
    avg_rev_by_miles[b] = mean

In [68]:
print(avg_rev_by_miles)

{'volkswagen': 128575.21346057257, 'audi': 129717.39725610684, 'jeep': 120960.34696406443, 'skoda': 113838.85835844709, 'bmw': 132763.56954859215, 'peugeot': 124970.0734560624, 'ford': 123839.98748680249, 'mazda': 125988.58647936786, 'nissan': 119789.55727615644, 'renault': 128049.14018587567, 'mercedes_benz': 130664.70305021382, 'opel': 128906.59258521028, 'seat': 121473.93904870408, 'citroen': 120512.35044384407, 'honda': 125943.2299012694, 'fiat': 116854.58867300538, 'mini': 94668.53270477313, 'smart': 100617.2604305582, 'hyundai': 104695.55677454745, 'sonstige_autos': 86519.33701657459, 'alfa_romeo': 128842.21748400854, 'subaru': 126046.21309370988, 'volvo': 138140.9678388939, 'mitsubishi': 126924.20777523685, 'kia': 109643.83561643836, 'suzuki': 107121.99312714777, 'lancia': 124524.79338842975, 'porsche': 98316.02708803612, 'toyota': 117341.28674904133, 'chevrolet': 100116.53116531165, 'dacia': 86427.77777777778, 'daihatsu': 119044.66501240694, 'trabant': 55634.51776649746, 'saab'

In [75]:
price_series = pd.Series(avg_rev_by_price)
print(price_series)

volkswagen         14533.496007
audi               15868.514221
jeep               12009.677819
skoda               6411.183478
bmw                14844.144883
peugeot             3166.981591
ford                8462.054706
mazda               5680.361194
nissan              4527.510820
renault             2334.258946
mercedes_benz      17244.060438
opel                3223.522449
seat                4356.666050
citroen             8880.624855
honda               3804.630113
fiat                5326.312836
mini                9846.432233
smart               3531.562012
hyundai             5415.057049
sonstige_autos    615647.624309
alfa_romeo         35737.169723
subaru              4248.948652
volvo               8774.005110
mitsubishi          3273.896766
kia                 5656.574168
suzuki              4371.683419
lancia              3165.754132
porsche            51089.857336
toyota              5232.657009
chevrolet           7447.764228
dacia               5839.654444
daihatsu

In [76]:
mp = pd.DataFrame(price_series, columns=['mean_price'])
mp

Unnamed: 0,mean_price
volkswagen,14533.496007
audi,15868.514221
jeep,12009.677819
skoda,6411.183478
bmw,14844.144883
peugeot,3166.981591
ford,8462.054706
mazda,5680.361194
nissan,4527.51082
renault,2334.258946


In [77]:
miles_series = pd.Series(avg_rev_by_miles)
print(miles_series)

volkswagen        128575.213461
audi              129717.397256
jeep              120960.346964
skoda             113838.858358
bmw               132763.569549
peugeot           124970.073456
ford              123839.987487
mazda             125988.586479
nissan            119789.557276
renault           128049.140186
mercedes_benz     130664.703050
opel              128906.592585
seat              121473.939049
citroen           120512.350444
honda             125943.229901
fiat              116854.588673
mini               94668.532705
smart             100617.260431
hyundai           104695.556775
sonstige_autos     86519.337017
alfa_romeo        128842.217484
subaru            126046.213094
volvo             138140.967839
mitsubishi        126924.207775
kia               109643.835616
suzuki            107121.993127
lancia            124524.793388
porsche            98316.027088
toyota            117341.286749
chevrolet         100116.531165
dacia              86427.777778
daihatsu

In [78]:
mm = pd.DataFrame(miles_series, columns=['mean_miles'])
mm

Unnamed: 0,mean_miles
volkswagen,128575.213461
audi,129717.397256
jeep,120960.346964
skoda,113838.858358
bmw,132763.569549
peugeot,124970.073456
ford,123839.987487
mazda,125988.586479
nissan,119789.557276
renault,128049.140186


In [79]:
autos["mean_price"] = mp
autos["mean_miles"] = mm

In [81]:
autos.describe(include='all')

Unnamed: 0,date_crawled,name,seller,offer_type,price,abtest,vehicle_type,registration_year,gearbox,power_ps,...,registration_month,fuel_type,brand,unrepaired_damage,ad_created,nr_of_pictures,postal_code,last_seen,mean_price,mean_miles
count,371528,371528,371528,371528,371528.0,371528,333659,371528,351319,371528.0,...,371528.0,338142,371528,299468,371528,371528.0,371528.0,371528,0.0,0.0
unique,280500,233531,2,2,,2,8,280500,2,,...,,7,40,2,114,,,182806,,
top,2016-03-24 14:49:47,Ford_Fiesta,privat,Angebot,,test,limousine,2016-03-24 14:49:47,manuell,,...,,benzin,volkswagen,nein,2016-04-03 00:00:00,,,2016-04-07 06:45:59,,
freq,7,657,371525,371516,,192585,95894,7,274214,,...,,223857,79640,263182,14450,,,17,,
mean,,,,,17295.14,,,,,115.549477,...,5.734445,,,,,0.0,50820.66764,,,
std,,,,,3587954.0,,,,,192.139578,...,3.712412,,,,,0.0,25799.08247,,,
min,,,,,0.0,,,,,0.0,...,0.0,,,,,0.0,1067.0,,,
25%,,,,,1150.0,,,,,70.0,...,3.0,,,,,0.0,30459.0,,,
50%,,,,,2950.0,,,,,105.0,...,6.0,,,,,0.0,49610.0,,,
75%,,,,,7200.0,,,,,150.0,...,9.0,,,,,0.0,71546.0,,,


In [83]:
mp["mean_miles"] = mm
mp

Unnamed: 0,mean_price,mean_miles
volkswagen,14533.496007,128575.213461
audi,15868.514221,129717.397256
jeep,12009.677819,120960.346964
skoda,6411.183478,113838.858358
bmw,14844.144883,132763.569549
peugeot,3166.981591,124970.073456
ford,8462.054706,123839.987487
mazda,5680.361194,125988.586479
nissan,4527.51082,119789.557276
renault,2334.258946,128049.140186
