In [1]:
%matplotlib inline

import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [2]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import LinearSVR 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score 
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import GradientBoostingRegressor
!pip install xgboost
from xgboost import XGBRegressor
!pip install catboost
from catboost import CatBoostRegressor
from sklearn.ensemble import AdaBoostRegressor



In [3]:
train = pd.read_csv("./data/JC1C2R/train-data.csv",index_col="ID")
test = pd.read_csv("./data/JC1C2R/test-data.csv",index_col="ID")

In [4]:
y = train['Price']

In [5]:
data = pd.concat((train,test))
data.reset_index(drop=True,inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6018 entries, 0 to 6017
Data columns (total 12 columns):
Engine               5896 non-null object
Fuel_Type            5726 non-null object
Kilometers_Driven    5172 non-null float64
Location             5924 non-null object
Mileage              5958 non-null object
Name                 6018 non-null object
Owner_Type           5753 non-null object
Power                5154 non-null object
Price                4999 non-null float64
Seats                5942 non-null float64
Transmission         6018 non-null object
Year                 5661 non-null float64
dtypes: float64(4), object(8)
memory usage: 564.3+ KB


In [6]:
data.head(30)

Unnamed: 0,Engine,Fuel_Type,Kilometers_Driven,Location,Mileage,Name,Owner_Type,Power,Price,Seats,Transmission,Year
0,1493 CC,Petrol,125943.0,Pune,13.0 kmpl,Honda City 1.5 EXI,Second,100 bhp,1.45,5.0,Manual,2001.0
1,1497 CC,Petrol,22974.0,Kochi,18.0 kmpl,Honda City i-VTEC CVT VX,First,,11.97,5.0,Automatic,2018.0
2,1794 CC,Petrol,79012.0,Coimbatore,13.4 kmpl,Toyota Corolla H2,First,,2.64,5.0,Manual,2005.0
3,1248 CC,Diesel,84175.0,Hyderabad,,Maruti Swift Dzire VDI,First,74 bhp,6.95,5.0,Manual,2013.0
4,2982 CC,,71000.0,Delhi,13.0 kmpl,Toyota Fortuner 4x2 Manual,First,168.5 bhp,15.25,7.0,Manual,2013.0
5,1968 CC,Diesel,,Chennai,20.0 kmpl,Skoda Laura Ambiente 2.0 TDI CR AT,First,138.1 bhp,6.95,5.0,Automatic,2013.0
6,2993 CC,Diesel,41000.0,Mumbai,15.3 kmpl,BMW X5 xDrive 30d,First,258 bhp,45.0,5.0,Automatic,
7,1248 CC,,29000.0,Bangalore,23.4 kmpl,Maruti Swift Dzire VDI,First,74 bhp,5.75,5.0,Manual,2014.0
8,1197 CC,Petrol,,Delhi,17.0 kmpl,Hyundai i20 Sportz Option,First,80 bhp,2.6,5.0,Manual,2010.0
9,1248 CC,Diesel,22000.0,Delhi,24.3 kmpl,Maruti Vitara Brezza ZDi Plus Dual Tone,,88.5 bhp,8.25,5.0,Manual,2016.0


In [7]:
del data['Price']

In [8]:
data.shape

(6018, 11)

## 0. Modelname

#### 1. 회사명 넣기

In [9]:
data['company'] = 0

In [10]:
for i in range(len(data)) :
   data['company'][i] = data["Name"][i].split(" ")[0]

In [11]:
data['company'].head()

0     Honda
1     Honda
2    Toyota
3    Maruti
4    Toyota
Name: company, dtype: object

In [12]:
data['company'].unique()

array(['Honda', 'Toyota', 'Maruti', 'Skoda', 'BMW', 'Hyundai', 'Mahindra',
       'Mercedes-Benz', 'Tata', 'Jeep', 'Audi', 'Land', 'Renault',
       'Volkswagen', 'Chevrolet', 'Nissan', 'Porsche', 'Ford', 'Jaguar',
       'Mitsubishi', 'Datsun', 'Fiat', 'Volvo', 'Mini', 'Force', 'Isuzu',
       'ISUZU', 'Bentley', 'Lamborghini'], dtype=object)

In [13]:
##Isuzu 대소문자 통일 
for i in range(len(data)) :
    if data['company'][i] == 'ISUZU' :
        data['company'][i] = 'Isuzu'

In [14]:
for i in range(len(data)) :
    print(data["Name"][i].split(" ")) 

['Honda', 'City', '1.5', 'EXI']
['Honda', 'City', 'i-VTEC', 'CVT', 'VX']
['Toyota', 'Corolla', 'H2']
['Maruti', 'Swift', 'Dzire', 'VDI']
['Toyota', 'Fortuner', '4x2', 'Manual']
['Skoda', 'Laura', 'Ambiente', '2.0', 'TDI', 'CR', 'AT']
['BMW', 'X5', 'xDrive', '30d']
['Maruti', 'Swift', 'Dzire', 'VDI']
['Hyundai', 'i20', 'Sportz', 'Option']
['Maruti', 'Vitara', 'Brezza', 'ZDi', 'Plus', 'Dual', 'Tone']
['Mahindra', 'Thar', 'CRDe', 'AC']
['Honda', 'WRV', 'i-VTEC', 'VX']
['Mercedes-Benz', 'E-Class', '230', 'E', 'AT']
['Maruti', 'Wagon', 'R', 'LXI', 'CNG']
['Tata', 'Zest', 'Quadrajet', '1.3', 'XT']
['Hyundai', 'Elantra', '2.0', 'SX', 'Option', 'AT']
['Maruti', 'Swift', 'Vdi', 'BSIII']
['Maruti', 'Ritz', 'VDi']
['Honda', 'City', '1.5', 'V', 'MT']
['Jeep', 'Compass', '2.0', 'Limited', 'Option', '4X4']
['Hyundai', 'Verna', '1.6', 'SX', 'VTVT', '(O)']
['Audi', 'Q5', '2.0', 'TDI']
['Hyundai', 'Grand', 'i10', 'Asta']
['Land', 'Rover', 'Discovery', 'Sport', 'TD4', 'HSE', '7S']
['Tata', 'Indica', 'Vi

['Hyundai', 'i20', 'Magna', 'Optional', '1.2']
['Maruti', 'Baleno', 'Zeta', 'Automatic']
['Ford', 'EcoSport', '1.5', 'TDCi', 'Ambiente']
['Honda', 'Accord', '2.4', 'Elegance', 'M/T']
['Honda', 'Civic', '2010-2013', '1.8', 'V', 'AT', 'Sunroof']
['Audi', 'Q7', '3.0', 'TDI', 'Quattro', 'Premium', 'Plus']
['Toyota', 'Fortuner', '4x2', 'AT']
['Hyundai', 'Creta', '1.6', 'SX', 'Plus', 'Diesel']
['BMW', 'X3', 'xDrive20d', 'Expedition']
['Maruti', 'Alto', '800', 'VXI']
['Chevrolet', 'Beat', 'LT']
['Mercedes-Benz', 'E-Class', 'Facelift']
['Chevrolet', 'Beat', 'LS']
['Hyundai', 'Verna', '1.6', 'SX', 'CRDI', '(O)', 'AT']
['Maruti', 'Ciaz', '1.3', 'S']
['Nissan', 'Sunny', '2011-2014', 'XV']
['Hyundai', 'i10', 'Sportz']
['Land', 'Rover', 'Range', 'Rover', '2.2L', 'Pure']
['Honda', 'BRV', 'i-VTEC', 'V', 'CVT']
['Hyundai', 'i20', '1.2', 'Spotz']
['BMW', 'X3', 'xDrive', '20d', 'Luxury', 'Line']
['Maruti', 'Swift', 'ZDi']
['BMW', '7', 'Series', '2007-2012', '740Li']
['Renault', 'Fluence', '1.5']
['Honda

['Hyundai', 'Grand', 'i10', 'Asta', 'Option']
['Honda', 'Civic', '2006-2010', '1.8', 'V', 'AT']
['Toyota', 'Fortuner', '4x2', '4', 'Speed', 'AT']
['BMW', '5', 'Series', '2013-2017', '530d', 'M', 'Sport']
['Honda', 'Accord', '2.4', 'Elegance', 'A/T']
['Tata', 'Indigo', 'CS', 'eVX']
['Toyota', 'Fortuner', '2.8', '4WD', 'MT']
['Skoda', 'Laura', 'Ambiente']
['Honda', 'Amaze', 'S', 'i-Dtech']
['Skoda', 'Laura', '1.9', 'TDI', 'MT', 'Ambiente']
['Volkswagen', 'Passat', 'Diesel', 'Highline', '2.0', 'TDI']
['Honda', 'Jazz', 'V']
['Tata', 'Indica', 'Vista', 'Aqua', '1.3', 'Quadrajet']
['Tata', 'Indica', 'Vista', 'TDI', 'LS']
['Maruti', 'Swift', 'Dzire', 'Tour', 'LDI']
['Toyota', 'Fortuner', '2.8', '2WD', 'AT']
['Toyota', 'Fortuner', '3.0', 'Diesel']
['Hyundai', 'i10', 'Magna', 'AT']
['BMW', 'X1', 'sDrive20d']
['Chevrolet', 'Enjoy', 'TCDi', 'LS', '8', 'Seater']
['Honda', 'City', '1.5', 'E', 'MT']
['Renault', 'Scala', 'Diesel', 'RxL']
['Ford', 'Ecosport', '1.5', 'DV5', 'MT', 'Titanium']
['Ford', '

['Skoda', 'Fabia', '1.2', 'MPI', 'Ambiente', 'Petrol']
['Hyundai', 'i20', '1.4', 'Sportz']
['Hyundai', 'Xcent', '1.2', 'Kappa', 'SX', 'Option']
['Mahindra', 'Scorpio', '2.6', 'SLX', 'CRDe']
['Hyundai', 'EON', 'D', 'Lite', 'Plus']
['Tata', 'Safari', 'Storme', 'VX']
['Renault', 'Fluence', 'Diesel', 'E4']
['Tata', 'Indica', 'V2', 'eLS']
['Hyundai', 'i20', '1.2', 'Magna']
['Honda', 'Brio', 'S', 'MT']
['Renault', 'Duster', '85PS', 'Diesel', 'RxL', 'Optional']
['Hyundai', 'Verna', '1.6', 'SX']
['Mahindra', 'XUV500', 'W8', '2WD']
['Audi', 'Q7', '4.2', 'FSI', 'quattro']
['Honda', 'Civic', '2006-2010', '1.8', 'V', 'AT']
['Renault', 'Duster', '110PS', 'Diesel', 'RXZ', 'Option']
['Audi', 'A4', '2.0', 'TDI']
['Hyundai', 'i10', 'Sportz']
['Honda', 'Brio', 'S', 'MT']
['Mahindra', 'XUV500', 'W8', '2WD']
['Maruti', 'Swift', 'Dzire', '1.2', 'Vxi', 'BSIV']
['Hyundai', 'Verna', '1.6', 'SX', 'VTVT']
['Audi', 'Q7', '3.0', 'TDI', 'Quattro', 'Premium', 'Plus']
['Hyundai', 'Verna', 'Xi', '(Petrol)']
['Maruti'

['Mercedes-Benz', 'B', 'Class', 'B180']
['Maruti', 'Ertiga', 'SHVS', 'ZDI', 'Plus']
['Hyundai', 'Accent', 'GLE']
['Volkswagen', 'Polo', 'Diesel', 'Trendline', '1.2L']
['Maruti', 'Ertiga', 'ZDI']
['Ford', 'Fiesta', '1.4', 'Duratorq', 'ZXI']
['BMW', 'X6', 'xDrive30d']
['Maruti', 'Grand', 'Vitara', 'AT']
['Ford', 'Fiesta', '1.4', 'Duratec', 'ZXI']
['Maruti', 'Ciaz', 'ZDi']
['Audi', 'Q7', '45', 'TDI', 'Quattro', 'Technology']
['Audi', 'Q5', '3.0', 'TDI', 'Quattro', 'Technology']
['Hyundai', 'Verna', '1.6', 'SX', 'CRDi', '(O)']
['Maruti', 'Swift', 'ZXI', 'ABS']
['Audi', 'A6', '35', 'TDI', 'Matrix']
['Honda', 'Civic', '2006-2010', '1.8', 'S', 'AT']
['Chevrolet', 'Optra', '1.6', 'Elite']
['Mahindra', 'XUV500', 'W8', '4WD']
['Maruti', 'Baleno', 'Alpha', '1.2']
['Ford', 'Ikon', '1.3', 'Flair']
['Volkswagen', 'Polo', 'Petrol', 'Highline', '1.2L']
['Ford', 'Figo', 'Diesel', 'ZXI']
['Hyundai', 'Verna', 'SX', 'CRDi', 'AT']
['Maruti', 'Ciaz', 'ZDi', 'Plus', 'SHVS']
['Hyundai', 'Creta', '1.6', 'CRDi'

['Ford', 'Endeavour', '3.2', 'Titanium', 'AT', '4X4']
['Jaguar', 'F', 'Type', '5.0', 'V8', 'S']
['Hyundai', 'Grand', 'i10', 'Magna']
['Toyota', 'Innova', '2.5', 'GX', '(Diesel)', '7', 'Seater']
['Honda', 'Jazz', '1.2', 'V', 'CVT', 'i', 'VTEC']
['Maruti', 'Wagon', 'R', 'LXI', 'CNG']
['Skoda', 'Superb', 'L&K', '2.0', 'TDI', 'AT']
['Tata', 'Indica', 'Vista', 'Quadrajet', 'VX']
['BMW', '3', 'Series', '320d', 'Prestige']
['Hyundai', 'Creta', '1.6', 'SX', 'Plus', 'Dual', 'Tone', 'Petrol']
['Mahindra', 'XUV500', 'W8', '2WD']
['BMW', 'X1', 'sDrive', '20d', 'Sportline']
['Hyundai', 'i10', 'Era', '1.1', 'iTech', 'SE']
['Skoda', 'Superb', '3.6', 'V6', 'FSI']
['Skoda', 'Superb', 'Elegance', '1.8', 'TSI', 'AT']
['Maruti', 'Swift', 'VDI']
['Maruti', 'Dzire', 'AMT', 'VXI']
['Volkswagen', 'Vento', 'Petrol', 'Highline', 'AT']
['Mini', 'Cooper', '3', 'DOOR', 'S']
['Audi', 'Q5', '2008-2012', '2.0', 'TDI']
['Maruti', 'Swift', 'VDI']
['Audi', 'A6', '2011-2015', '35', 'TDI', 'Technology']
['Skoda', 'Fabia',

#### 2. 차종 분류

In [15]:
data['cartype']= 'null'

In [40]:
for i in range(len(data)) :
        if 'Sedan' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Sport' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Wagon' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Wagon'
        if 'Hatchback' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Coupe' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Coupe'
        if 'Convertible' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Convertible'
        #### 기본 분류
        if 'Sportz' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'               ####스포티지 : 준중형 suv
        if '7-seater' in data['Name'][i].split(" ") or '8-seater' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'  
 ###############################################################################################  브랜드별 분류         
        if 'X1' in data['Name'][i].split(" ") or 'X3' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if '1' in data['Name'][i].split(" ") and 'Series' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'     
        if '3' in data['Name'][i].split(" ") and 'Series' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan' 
        if '5' in data['Name'][i].split(" ") and 'Series' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if '7' in data['Name'][i].split(" ") and 'Series' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'X5' in data['Name'][i].split(" ") or 'X6' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Z4' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Convertible'
            
            #### BMW
            #### 1 Series : 해치백
            #### 3 Series : Sedan, 중형 , 
            #### 5 Series : 준대형 (Sedan 추정) , X5, X6 : 준대형 SUV
            #### 7 Series : 대형 Sedan
            #### X1 : 준중형 suv, X3 : 중형 suv ,  
            #### Z4 : Convertible
            
        if 'GLC' in data['Name'][i].split(" ") or 'GLA' in data['Name'][i].split(" ") or 'GLE' in data['Name'][i].split(" ") \
        or 'GLS' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV' 
        if 'M-Class' in data['Name'][i].split(" ") or 'GL-Class' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if  'A' in data['Name'][i].split(" ") or 'S' in data['Name'][i].split(" ") and 'Class' in data['Name'][i].split(" "):
            data['cartype'][i] = 'Sedan'
        if  'B' in data['Name'][i].split(" ") or 'C' in data['Name'][i].split(" ") and 'Class' in data['Name'][i].split(" "):
            data['cartype'][i] = 'Sedan'
        if 'E-Class' in data['Name'][i].split(" ") or 'C-Class' in data['Name'][i].split(" ") or 'S-Class' in data['Name'][i].split(" ") or 'CLS-Class' in data['Name'][i].split(" ") or 'CLA' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'SLK-Class' in data['Name'][i].split(" ") or 'SL-Class' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Coupe'
        if 'R-Class' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'
        if 'SLC' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Convertible'
            
            #### 메르세데스-벤츠 
            #### A Class : 준중형 세단/해치백
            #### GLC : 중형 SUV, GLE, GLA : 준중형 SUV , M-Class, GL-Class : SUV 
            #### C-Class, S Class : 대형 세단/쿠페 , 
            #### B class, E-Class : 준대형 세단/쿠페  CLA : 쿠페형 세단, SLC : 컨버티블
            #### CLS-Class 준대형 세단
            #### GLS : 대형 SUV 
            #### SLK-Class, SL-Class : Coupe
            #### R-Class : 미니밴(MUV)
            
        if 'Land' in data['Name'][i].split(" ") and 'Rover' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Freelander' in data['Name'][i].split(" ") or 'Evoke' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
            
            #### 랜드로버
            ####Discovery Sport : 중형 suv    
            ####Land Rover Sport : 준대형 suv
            ####Land Rover Range Rover : 준대형 suv
            ####Land Rover Freelander,Evoke : 중형 suv 
            
        if 'Q3' in data['Name'][i].split(" ") or 'Q5' in data['Name'][i].split(" ") or 'Q7' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV' 
        if 'A6' in data['Name'][i].split(" ") or 'A4' in data['Name'][i].split(" ") or 'A7' in data['Name'][i].split(" ") \
        or 'A8' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'TT' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Coupe'
        if 'A3' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
            
            #### 아우디 
            #### Q3(준중형),Q5 : 중형 suv,  Q7 : 준대형 suv
            #### A6 : 준대형 세단/왜건 , A4: 중형 세단/왜건 A3 : 해치백 , A7 : 세단,  A8 : 대형 세단
            #### TT : 소형 Coupe
            
        if 'City' in data['Name'][i].split(" ") or 'Jazz' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Brio' in data['Name'][i].split(" ") or 'Amaze' in data['Name'][i].split(" ") or 'Accord' in data['Name'][i].split(" ") \
        or 'Civic' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'BRV' in data['Name'][i].split(" ") or 'BR-V' in data['Name'][i].split(" ") or 'WRV' in data['Name'][i].split(" ") or 'XRV' in data['Name'][i].split(" ") \
        or 'HRV' in data['Name'][i].split(" ") or 'CR-V' in data['Name'][i].split(" ") or 'WR-V' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Mobilio' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'
            
            #### 혼다 
            #### BRV,WRV,XRV, HRV, CRV, 파일럿 : suv
            #### City, Brio, Amaze, Jazz : 소형(해치/세단)
            #### 중형 Accord, 준중형 Civic
            #### Mobilio : MPV(MUV)
            
        if 'Etios' in data['Name'][i].split(" ") or 'Corolla' in data['Name'][i].split(" ") or 'Camry' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Fortuner' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Qualis' in data['Name'][i].split(" ") or 'Innova' in data['Name'][i].split(" "):
            data['cartype'][i] = 'MUV'  
            
            #### 도요타 
            #### Etios : 소형 세단
            #### Fortuner : suv , Innova : MPV , Corolla : 준중형 세단
            #### Qualis : MPV , Camry Hybrid : 중형 세단
            
        if 'Swift' in data['Name'][i].split(" ") or 'SX4' in data['Name'][i].split(" ") or 'Ignis' in data['Name'][i].split(" ") \
           or 'Baleno' in data['Name'][i].split(" ") or 'Celerio' in data['Name'][i].split(" ") or 'Estilo' in data['Name'][i].split(" ") \
         or 'Zen' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Alto' in data['Name'][i].split(" ") or '800' in data['Name'][i].split(" ") or 'A-Star' in data['Name'][i].split(" ") or \
            'Ritz' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Wagon'
        if 'Vitara' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'S' in data['Name'][i].split(" ") and 'Cross' in data['Name'][i].split(" ") or 'S-Cross' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Ciaz' in data['Name'][i].split(" ") or 'Dzire' in data['Name'][i].split(" ") or 'Esteem' in data['Name'][i].split(" ") or \
            '1000' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Ertiga' in data['Name'][i].split(" ") or 'Omni' in data['Name'][i].split(" ") or 'Eeco' in data['Name'][i].split(" ") or \
        'Versa' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'
            
            #### Maruti(마루티 스즈키, 일본회사, 인도1위)
            #### Swift, SX4, Ignis, Baleno, Celerio, Zen Estilo : 해치백
            #### Alto, 800 , A-Star(추정), Ritz : 경차(웨건?)
            #### Ertiga , Omni, Eeco, Versa(추정): MPV(MUV)
            #### Vitara Brezza, S Cross(추정) : SUV
            #### Ciaz, Dzire, Esteem, 1000 : 세단  
            
        if 'Laura' in data['Name'][i].split(" ") or 'Superb' in data['Name'][i].split(" ") or 'Superb' in data['Name'][i].split(" ") or \
        'Rapid' in data['Name'][i].split(" ") or 'Octavia' in data['Name'][i].split(" ") or 'Fabia' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Yeti' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
            
            #### Sokoda(소코다)
            #### Laura, Superb, Octavia, Rapid, Fabia : 세단
            #### Yeti : SUV
            
        if 'i20' in data['Name'][i].split(" ") or 'EON' in data['Name'][i].split(" ") or 'Getz' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Elantra' in data['Name'][i].split(" ") or 'Verna' in data['Name'][i].split(" ") or 'Accent' in data['Name'][i].split(" ") \
        or 'Xcent' in data['Name'][i].split(" ") or 'Sonata' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Creta' in data['Name'][i].split(" ") or 'Santa' in data['Name'][i].split(" ") or 'Tucson' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'i10' in data['Name'][i].split(" ") or 'Santro' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
            
            #### Hyundai(현대)
            #### i20 Sportiz, i20 Magma, i20 Asta, EON, Getz : hatchback, 
            #### Elantra : 세단 추정
            #### Verna, Accent : 소형(해치/세단)
            #### Creta : 소형 SUV ,  Santa Fe, Tucson : SUV
            #### Xcent : Sedan, Sonata : sedan
            #### i10 , Santro : 경차, 해치백
            
        if 'Thar' in data['Name'][i].split(" ") or 'Scorpio' in data['Name'][i].split(" ") or 'Logan' in data['Name'][i].split(" ") or \
        'Bolero' in data['Name'][i].split(" ") or 'XUV500' in data['Name'][i].split(" ") or 'Quanto' in data['Name'][i].split(" ") \
        or 'TUV' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Ssangyong' in data['Name'][i].split(" ") or 'NuvoSport' in data['Name'][i].split(" ") or 'XUV300' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Mahindra' in data['Name'][i].split(" ") and 'Jeep' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'TUV' in data['Name'][i].split(" ") and '300' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Verito' in data['Name'][i].split(" ") or 'KUV' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Xylo' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'
            
            #### Mahindra(인도회사)
            #### Thar, Scorpio, Logan, Bolero(7-9 seat), XUV500, Quanto(mini): SUV
            #### Ssangyong, NuvoSport, Jeep, TUV, XUV300, TUV 300 : SUV
            ####  Verito, KUV : Hatchback
            ####  Xylo : MUV , 
            
        if 'Nexon' in data['Name'][i].split(" ") or 'Hexa' in data['Name'][i].split(" ") or 'Sumo' in data['Name'][i].split(" ") or \
        'Safari' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Zest' in data['Name'][i].split(" ") or 'Tigor' in data['Name'][i].split(" ") or 'Indigo' in data['Name'][i].split(" ") or \
        'Manza' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Nano' in data['Name'][i].split(" ") or 'Bolt' in data['Name'][i].split(" ") or 'Tiago' in data['Name'][i].split(" ") or \
        'Indica' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'hatchback'
        if 'Xenon' in data['Name'][i].split(" ") or 'Venture' in data['Name'][i].split(" ") or 'Indigo' in data['Name'][i].split(" ") or \
        'Manza' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'
            
            #### Tata(타타자동차)
            #### Nexon, Hexa, Sumo ,  New Safari, Safari Strome : SUV 
            #### Zest, Tigor, Indigo, Manza : Sedan
            #### Nano : 경차 , 해치백 추정 
            #### Bolt, Tiago(추정), Indica : hatchback
            #### , Xenon : 트럭 , Venture : MUV
            
        if 'Compass' in data['Name'][i].split(" ")  :
            data['cartype'][i] = 'SUV'
            
            #### Jeep(지프)
            #### Compass(Limited, Longtitue, Sport) : SUV 
            
        if 'Duster' in data['Name'][i].split(" ") or 'Captur' in data['Name'][i].split(" ") or 'KWID' in data['Name'][i].split(" ") or \
            'Koleos' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV' 
        if 'Lodgy' in data['Name'][i].split(" ")  :
            data['cartype'][i] = 'MUV' 
        if 'Scala' in data['Name'][i].split(" ") or 'Fluence' in data['Name'][i].split(" ")  :
            data['cartype'][i] = 'Sedan' 
        if 'Pulse' in data['Name'][i].split(" ")  :
            data['cartype'][i] = 'Hatchback'
            
            #### Renault(르노삼성)
            #### Duster, Captur(소형), KWID, Koleos(준,중형) : SUV 
            #### Lodgy : MPV(MUV)
            #### Scala , Fluence: Sedan
            #### Pulse : Hatchback
            
        if 'Polo' in data['Name'][i].split(" ") or 'CrossPolo' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Ameo' in data['Name'][i].split(" ") or 'Passat' in data['Name'][i].split(" ") or 'Jetta' in data['Name'][i].split(" ") \
        or 'Vento' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Tiguan' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
            
            #### Volkswagen(폭스바겐, 독일)
            #### Polo : 해치백/쿠페, CrossPolo : 해치
            #### Ameo,Passat, Jetta: Sedan , Vento : 세단
            #### Tiguan : SUV
            
        if 'Sail' in data['Name'][i].split(" ") or 'Optra' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Aveo' in data['Name'][i].split(" ") or 'Cruze' in data['Name'][i].split(" ") or \
            'Spark' in data['Name'][i].split(" ") or 'Beat' in data['Name'][i].split(" "):
            data['cartype'][i] = 'Hatchback'
        if 'Enjoy' in data['Name'][i].split(" ") or 'Tavera' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'
        if 'Captiva' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
            
            #### Chevrolet(쉐보레)
            #### Sail, Optra : 세단(추정) 
            #### Aveo : 해치백, 세단, Cruze :해치백, 웨건
            #### Spark, Beat : 경차 -- 해치백으로
            #### Enjoy, Tavera : MPV(MUV)
            #### Captiva : SUV
            
        if 'Sunny' in data['Name'][i].split(" ") or 'Teana' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Terrano' in data['Name'][i].split(" ") or 'X-Trail' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Micra' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Evalia' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'
            
            #### Nissan(닛산)
            #### Sunny, Teana : Sedan
            #### Terrano :  SUV , X-Trail 
            #### Micra : Hatchback
            #### Evalia : MUV(VAN)
            
        if 'Cayenne' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Cayman' in data['Name'][i].split(" ") or 'Boxster' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Coupe'
        if 'Panamera' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
            
            #### Porsche(포르쉐)
            #### Cayenne : SUV
            #### Cayman, Boxster : Coupe
            #### Panamera : Sedan Luxury(sport car)
            
        if 'EcoSport' in data['Name'][i].split(" ") or 'Endeavour' in data['Name'][i].split(" ") or 'Ecosport' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Figo' in data['Name'][i].split(" ") or 'Fiesta' in data['Name'][i].split(" ") or 'Aspire' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Ford' in data['Name'][i].split(" ") and 'Classic' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'Ikon' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'Freestyle' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Wagon'
            
            #### Ford(포드)
            #### Ecosport(소), Endeavour : SUV
            #### Figo, Fiesta, Aspire, Classic(?) : Hatchback
            #### Ikon : Sedan
            #### Freestyle : Wagon
            
        if 'XF' in data['Name'][i].split(" ") or 'XE' in data['Name'][i].split(" ") or 'XJ' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'F' in data['Name'][i].split(" ") and 'Type' in data['Name'][i].split(" ") :
            
            data['cartype'][i] = 'Convertible'
            #### Jaguar(재규어)
            #### XF, XE, XJ : Sedan
            ####  F type : Convertible(sport)
            
        if 'Pajero' in data['Name'][i].split(" ") or 'Outlander' in data['Name'][i].split(" ") or 'Montero' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
        if 'Lancer' in data['Name'][i].split(" ") or 'Cedia' in data['Name'][i].split(" ")  :
            data['cartype'][i] = 'Sedan'
            
            #### Mitsubishi(미쓰비시)
            #### Pajero, Outlander : SUV , Montero : 풀사이즈 SUV
            #### Lancer : Sedan(추정), Cedia : 세단

        if 'GO' in data['Name'][i].split(" ") or 'redi-GO' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback '
            
            #### Datsun(일본) 
            #### GO, redi-GO : hatchback
            
        if 'Linea' in data['Name'][i].split(" ") or 'Petra' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan' 
        if 'Punto' in data['Name'][i].split(" ") or 'Avventura' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV' 
            
            #### Fiat(이태리, 피아트)
            #### Linea , Petra: Sedan
            #### Punto, Avventura(추정) : SUV
            
        if 'V40' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Hatchback'
        if 'S60' in data['Name'][i].split(" ") or 'S80' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Sedan'
        if 'XC60' in data['Name'][i].split(" ") or 'XC90' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
            
            #### Volvo(볼보)
            #### V40 : Hatchback 
            #### S60, S80 : Sedan 
            #### XC60 : SUV(중형), XC90 : SUV 준대형
            
        if 'Cooper' in data['Name'][i].split(" ") or 'Countryman' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Coupe'
            
            #### Mini(미니쿠퍼/BMW)
            ##### Cooper 5, Cooper 3, Countryman : Coupe
            
        if 'Force' in data['Name'][i].split(" ") and 'One' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
            
            #### Force(포르쉐)
            #### One : SUV    
            
        if 'D-MAX' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'MUV'
        if 'MUX' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'SUV'
            
            #### Isuzu
            #### D-MAX : 픽업트럭 , MUX : SUV(7seat)
            
        if 'Continental' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Coupe'
            
            #### Bentley(벤틀리)
            #### continental : coupe
            
        if 'Gallardo' in data['Name'][i].split(" ") :
            data['cartype'][i] = 'Coupe'
            
            #### Lamborghini(람보르기니, 이태리)
            ### Gallardo : coupe(sport)
        

In [17]:
data['cartype'].count()

6018

In [18]:
data['cartype'].value_counts()

Hatchback      2039
Sedan          1752
SUV            1230
MUV             400
Wagon           387
hatchback        87
null             59
Coupe            44
Hatchback        12
Convertible       8
Name: cartype, dtype: int64

In [41]:
for i in range(len(data)) :
    if data['cartype'][i]== 'null' :
        print(data['Name'][i])

### 1. Engine

In [20]:
data['Engine'].isnull().sum()

122

In [21]:
data['Engine'].value_counts()

1197 CC    542
1248 CC    470
0CC        348
1498 CC    285
998 CC     254
          ... 
2773 CC      1
72 CC        1
2147 CC      1
2997 CC      1
1948 CC      1
Name: Engine, Length: 142, dtype: int64

In [22]:
data['Engine'][1].split("CC")[0]

'1497 '

In [23]:
## 엔진 칼럼에서 CC(단위) 제거
from tqdm import tqdm_notebook

data["CC"]=0
for cnt in tqdm_notebook(range(len(data))):
    if(pd.isnull(data["Engine"][cnt])):
        data["CC"][cnt] = np.nan
    else:
        data["CC"][cnt] = data["Engine"][cnt].split(" CC")[0]
del data["Engine"]

data["CC"].replace("0CC",0,inplace=True)

HBox(children=(IntProgress(value=0, max=6018), HTML(value='')))




In [24]:
data['CC'].isnull().sum()
data["CC"].replace("",0,inplace=True)

In [25]:
data['CC'].value_counts()

1197    540
1248    464
0       348
1498    285
998     253
       ... 
2982      1
2993      1
2360      1
2773      1
2694      1
Name: CC, Length: 157, dtype: int64

In [26]:
data['CC'].fillna(0, inplace = True)
data['CC'] = data['CC'].astype("int64")

In [27]:
cc_bin = [1, 1000, 1300, 1600, 2000, 7000]
cc_size = ['SS', 'S', 'SM', 'M', 'L']

In [28]:
cc_cats = pd.cut(data["CC"], cc_bin ,labels= cc_size)
cc_cats.value_counts()

#### 엔진, 사이즈로 분류
### 1. 배기량 1000cc 미만 경형(길이 3.6m, 너비 1.6m, 높이 2.0m 이하)
### 2. 배기량 1,600cc 미만 중형 (길이 4.7m, 너비 1.7m, 높이 2.0m 이하)
### - 1300에서 1600 cc 준중형
### 3. 배기량이 1,600cc 이상 2,000cc 미만 중형 (or 길이와 너비, 높이 중 어느 하나라도 소형 기준을 초과)
### 4. 배기량 2000cc 이상 대형 (or 길이와 너비, 그리고 높이 모두가 소형 기준을 초과)

S     1676
SM    1535
L     1123
M      665
SS     549
Name: CC, dtype: int64

### 2.  Fuel Type

In [45]:
data['Fuel_Type'].isnull().sum()

171

In [30]:
data['Fuel_Type'].value_counts()

Diesel      3071
Petrol      2592
CNG           52
LPG            9
Electric       2
Name: Fuel_Type, dtype: int64

In [44]:
for i in range(len(data)) :
    if(pd.isnull(data['Fuel_Type'][i])) :
        print(data['Name'][i])

Toyota Fortuner 4x2 Manual
Chevrolet Optra 1.6 LT Royale
Honda City Corporate Edition
Renault Captur 1.5 Diesel Platine Mono
Hyundai i10 Era
Hyundai Creta 1.6 SX
Hyundai EON D Lite Plus
Ford Fiesta 1.4 Duratorq ZXI
Honda City V AT
Honda Brio 1.2 S MT
Honda Civic 2010-2013 1.8 S MT
Hyundai Accent GLE
Chevrolet Aveo 1.4 LS
Toyota Innova 2.5 VX 7 STR BSIV
Maruti Ritz ZXI
Volvo XC60 D5
Hyundai i20 Asta Option 1.2
Tata Safari Storme 2012-2015 VX
Tata Nexon 1.2 Revotron XZ Plus
Nissan Teana XV
Volvo XC60 D5
Mahindra Scorpio VLX 4WD AIRBAG BSIV
Maruti Ciaz ZXi Option
Mahindra Scorpio S6 7 Seater
BMW 5 Series 2010-2013 525i
Hyundai Grand i10 AT Asta
Hyundai i20 Asta Option 1.2
Honda City V AT
Hyundai Elite i20 Asta Option
Chevrolet Beat LS
Isuzu MUX 4WD
Volkswagen Vento Diesel Comfortline
BMW 5 Series 2013-2017 520d Luxury Line
Maruti Ciaz AT ZXi
Ford Figo 2015-2019 1.5P Titanium AT
Maruti Baleno Zeta Automatic
Hyundai Xcent 1.2 Kappa AT SX Option
Honda City 1.5 V MT
Chevrolet Optra Magnum 2.0

In [43]:
for i in range(len(data)) :
    if(pd.isnull(data["Fuel_Type"][i])):
        ###디젤 엔진
            if 'TDI' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel'
            if 'CRDi' in data['Name'][i].split(" ") or 'CRDI' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel' 
            if 'CDI' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel'
            if 'VGT' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel'
            if 'GD' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel'
            if 'HSE' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel'
            if 'Quadrajet' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel'
            if 'Quattro' in data['Name'][i].split(" ") or 'quattro' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel' 
            if 'ZDI' in data['Name'][i].split(" ") or 'ZDi' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel' 
            if 'VDI' in data['Name'][i].split(" ") or 'VDi' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel'  
            if 'LDI' in data['Name'][i].split(" ") or 'LDi' in data['Name'][i].split(" ") or 'Ldi' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Disel'  
                
        ###가솔린(petrol) 
            if 'Petrol' in data['Name'][i].split(" ") or '(Petrol)' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'TFSI' in data['Name'][i].split(" ") or 'FSI' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'GDI' in data['Name'][i].split(" ") or 'GDi' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'LXI' in data['Name'][i].split(" ") or 'LXi' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'D-4D' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'CGI' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'TSI' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'MPI' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'i-Vtech' in data['Name'][i].split(" ") or 'VTEC' in data['Name'][i].split(" ") or 'i-VTEC' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            if 'VXI' in data['Name'][i].split(" ") or 'VXi' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'Petrol'
            
        ###천연(CNG)
            if 'CNG' in data['Name'][i].split(" ") or '(CNG)' in data['Name'][i].split(" ") :
                data['Fuel_Type'][i] = 'CNG'
        ###LPG
            if 'LPG' in data['Name'][i].split(" ") or '(LPG)' in data['Name'][i].split(" "):
                data['Fuel_Type'][i] = 'LPG'
    else : pass

### 3. Kilometer Driven

In [32]:
data['Kilometers_Driven'].isnull().sum()

846

### 4. Location

In [33]:
data['Location'].isnull().sum()

94

### 5. Mileage

In [46]:
변환 = lambda x: chr(ord(x)-32)if ord(x)>=97 else x
def 병합(s):
    answer=""
    for word in s:
        answer+=word
    return answer

In [47]:
data["LARGE_NAME"] = ""
for cnt in tqdm_notebook(range(len(data))): 
    data["LARGE_NAME"][cnt] = 병합(list(map(변환,data["Name"][cnt])))

HBox(children=(IntProgress(value=0, max=6018), HTML(value='')))




In [49]:
차종 = data["LARGE_NAME"].unique()
def cc_preprocessing(car_name):
    정상값 = data.loc[data["LARGE_NAME"]==car_name,"CC"].value_counts().index[0]
    차종별index = data.loc[data["LARGE_NAME"]==car_name,"CC"].index
    for cnt in 차종별index:
        data.loc[cnt,"CC"] = 정상값

for car in tqdm_notebook(차종):
    cc_preprocessing(car)

HBox(children=(IntProgress(value=0, max=1723), HTML(value='')))




In [52]:
data['CC'].value_counts().head(20)

1197    593
1248    507
1498    300
998     267
2179    235
1198    230
1497    229
1968    209
1995    177
1461    154
2143    146
1396    140
1582    140
1598    135
1199    133
2494    127
796     119
0       112
1086    101
2993     95
Name: CC, dtype: int64

In [56]:
data['Mileage'].isnull().sum()

12

In [54]:
차종 = data["LARGE_NAME"].unique()


def mileage_preprocessing(car_name):
    try:
        정상값 = data.loc[data["LARGE_NAME"]==car_name,"Mileage"].value_counts().index[0]
        차종별index = data.loc[data["LARGE_NAME"]==car_name,"Mileage"].index
        for cnt in 차종별index:
            data.loc[cnt,"Mileage"] = 정상값
    except:
        pass    

for car in tqdm_notebook(차종) :
    mileage_preprocessing(car)
    

HBox(children=(IntProgress(value=0, max=1723), HTML(value='')))




In [68]:
data[data['Mileage'].isnull()]['Name'] ####남은 결측치 확인

4418    Mahindra E Verito D4
Name: Name, dtype: object

In [67]:
#### 변환하고 남은 결측치 직접 채워주기

data['Mileage'][138] = 19.2     ###data['Name'] == 'Mahindra Logan Diesel 1.5 DLS'
data['Mileage'][477] = 20       ###data['Name'] == 'Ford Figo Diesel EXI Option'
data['Mileage'][637] = 12.99    ###data['Name'] == 'Toyota Innova 2.0 E'
data['Mileage'][968] = 16.49    ###data['Name'] == 'Ford Figo Aspire 1.5 Ti-VCT Titanium'
data['Mileage'][2518] = 17.5    ###data['Name'] == 'Nissan Sunny XE P'
data['Mileage'][2725] = 12.99   ###data['Name'] == 'Toyota Innova 2.5 ZX Diesel 7 Seater BSIII'
data['Mileage'][2946] = 18    ###data['Name'] == 'Renault Duster 85PS Diesel RxL Optional with Nav'
data['Mileage'][3022] = 21.2   ###data['Name'] == ' Fiat Grande Punto EVO 1.3 Active'
data['Mileage'][3132] = 23.91  ###data['Name'] == ' Toyota Prius 2009-2016 Z4'
data['Mileage'][3561] = 12   ###data['Name'] == ' Chevrolet Captiva LT'
data['Mileage'][4019] = 9.9  ###data['Name'] == ' Mercedes-Benz CLS-Class 2006-2010 350 CDI'

In [55]:
data['Mileage'].head()

0    13.0 kmpl
1    18.0 kmpl
2    13.4 kmpl
3    23.4 kmpl
4    13.0 kmpl
Name: Mileage, dtype: object

### 6. Owner type

In [36]:
data['Owner_Type'].isnull().sum()

265

### 7. Power

In [37]:
data['Power'].isnull().sum()

864

### 8. Seats

In [38]:
data['Seats'].isnull().sum()

76

### 9. Year

In [39]:
data['Year'].isnull().sum()

357