In [82]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt #For plotting correlation data
import seaborn

data = pd.read_csv("../input/carsforsale/cars_raw.csv")
data.head()

#Finding Correlation
def find_correlations(df):
    dataset = df.copy()
    cor = dataset.corr().Price
    return cor

def visualize_correlations(cor):
    plt.figure(figsize=(15,15))
    seaborn.barplot(x = cor.index,y = cor)

#Format/Cleanup
def format_price():
    #Removing comma
    data.Price = data.Price.str.replace(',','')
    #Removing currency symbol
    data.Price = data.Price.str[1:]
    #Removing inconsistent data that does not contain convertable value
    data.drop(data[data["Price"] == "ot Priced"].index, inplace=True)
    #Change to integer
    data.Price = data.Price.astype(int)
    
def format_used_new(df):
    if "Certified" in df["Used/New"]:
        df["Used/New"] = "New"
    return df

def format_drivetrain():
    #Setting number from 0-3 depending on drivetrain
    data["Drivetrain"] = ["3" if item == "4WD" or item == "Four-wheel Drive"
                   else "2" if item == "AWD" or item == "All-wheel Drive"
                   else "1" if item == "RWD" or item == "Rear-wheel Drive"
                   else "0" for item in data["Drivetrain"]]
    #Converting original string num value to int
    data["Drivetrain"].astype(int)

def clean_data():
    #Formatting price from string to integer
    format_price()
    
    #Formatting Used/New to only Used and New values
    data["Used/New"] = data["Used/New"].apply(lambda x: "New" if "Certified" in x else x)
    
    #Formatting drivetrain to int based on wheel-drive amount
    format_drivetrain()
    
if __name__ == "__main__":
    clean_data()
    visualize_correlations(find_correlations(data))