In [2]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.stats import linregress

In [3]:
#Create path for .csv file

filename = Path("used_car_sales.csv")

In [4]:
used_car_predictions_df = pd.read_csv(filename)
used_car_predictions_df.head()

Unnamed: 0,ID,pricesold,yearsold,zipcode,Mileage,Make,Model,Year,Trim,Engine,BodyType,NumCylinders,DriveType
0,18489,2200,2019,32804,120583,300,CHRYSLER,2006,LIMITED,V-6,SEDAN,6,RWD
1,45848,6200,2019,11727,46292,2101,,1977,,,Sedan,4,RWD
2,11937,13000,2019,89060,73000,1964 International,Scout 80,1964,,stock,SUV,4,4WD
3,20524,98000,2019,70118,3568,AC,AUTOKRAFT MK IV,1988,AK1269 ALUMINUM BODY,302 HO,ALUMINUM,0,ROADSTER
4,3285,7900,2019,35565,100,AC Cobra,,1965,,V8,,0,


In [5]:
#Print out all columns 

used_car_predictions_df.columns

Index(['ID', 'pricesold', 'yearsold', 'zipcode', 'Mileage', 'Make', 'Model',
       'Year', 'Trim', 'Engine', 'BodyType', 'NumCylinders', 'DriveType'],
      dtype='object')

In [14]:
#Rename columns

used_car_predictions_df.rename(columns={
    'ID': 'ID',
    'pricesold': 'Price_Sold',
    'yearsold': 'Year_Sold',
    'zipcode': 'Zipcode',
    'Mileage': 'Mileage',
    'Make': 'Make',
    'Model': 'Model',
    'Engine': 'Engine_Type',
    'BodyType': 'Body_Type',
    'NumCylinders':'Number_of_Cylinders',
    'DriveType': 'Drive_Type',        
                  }, inplace=True)
columns = used_car_predictions_df.columns
for column in columns:
    print (column)

ID
Price_Sold
Year_Sold
Zipcode
Mileage
Make
Model
Year
Trim
Engine_Type
Body_Type
Number_of_Cylinders
Drive_Type


In [15]:
used_car_predictions_df.dtypes

ID                      int64
Price_Sold              int64
Year_Sold               int64
Zipcode                object
Mileage                 int64
Make                   object
Model                  object
Year                    int64
Trim                   object
Engine_Type            object
Body_Type              object
Number_of_Cylinders     int64
Drive_Type             object
dtype: object

In [13]:
#Determine the number of cars with zipcodes that contain "*"

count = (used_car_predictions_df['zipcode'].str.contains('\*\*', regex=True)).sum()

print(count)

38289


In [17]:
#Determine the amount of 'Make' column with null values. 

null_count = used_car_predictions_df['Make'].isnull().sum()

print(null_count)

0


In [22]:
#Determine the amount of 'Model' column with null values. 

null_count = used_car_predictions_df['Model'].isnull().sum()

print(null_count)

573


In [23]:
#Drop null values of the 'Model' column

new_used_car_predictions = used_car_predictions_df.dropna(subset=['Model'])

new_used_car_predictions.head()

Unnamed: 0,ID,Price_Sold,Year_Sold,Zipcode,Mileage,Make,Model,Year,Trim,Engine_Type,Body_Type,Number_of_Cylinders,Drive_Type
0,18489,2200,2019,32804,120583,300,CHRYSLER,2006,LIMITED,V-6,SEDAN,6,RWD
2,11937,13000,2019,89060,73000,1964 International,Scout 80,1964,,stock,SUV,4,4WD
3,20524,98000,2019,70118,3568,AC,AUTOKRAFT MK IV,1988,AK1269 ALUMINUM BODY,302 HO,ALUMINUM,0,ROADSTER
5,85297,6760,2019,32256,90000,Acura,CL,2003,TYPE S,,,0,
6,27620,4550,2019,44124,78196,Acura,CL,1999,NO RESERVE AUCTION - LAST HIGHEST BIDDER WINS ...,4cyl,Coupe,4,FWD


In [24]:
#Determine the number of different types of engines in dataset. 

engine_value_count = new_used_car_predictions['Engine_Type'].nunique()

engine_value_count

22306

In [25]:
#Determine the number of different body_types within our dataset.

bodytype_value_count = new_used_car_predictions['Body_Type'].nunique()

bodytype_value_count

2310

In [39]:
name_counts = new_used_car_predictions['Body_Type'].value_counts()

display(name_counts)

Sedan                            18174
Coupe                            17960
SUV                              15334
Convertible                      12261
Standard Cab Pickup               4281
                                 ...  
Truck SUV convertible                1
Hi-Top Van                           1
4×4 removable hardtop pick up        1
Handicap Shuttle Bus                 1
Luxury Coupe                         1
Name: Body_Type, Length: 2310, dtype: int64

In [40]:
pd.DataFrame(name_counts).to_csv('body_types')