In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', None)

In [None]:
def lowercase_data(df):
    """Alters column names to make them easier to use
    
    Args:
        df (DataFrame): name of the variable the dataframe is saved as
    
    Returns:
        Lowercase column names with no '_' in place for spaces and slashes. 
    """
    for col in df.columns:
        df.columns = df.columns.str.lower()
        df.columns = df.columns.str.replace(' ', '_')
    return df

In [None]:
cars = pd.read_csv("10_cars.csv")

In [None]:
type_counts = cars['Type'].value_counts()

In [None]:
sns.set_style("dark")

# Plot the brand distribution
plt.figure(figsize=(10, 6))
sns.barplot(x=type_counts.index, y=type_counts.values)
plt.xlabel('Car Type')
plt.ylabel('Frequency')
plt.title('Car Type Distribution')
plt.xticks(rotation=45)

In [None]:
cars["Used Market Price"] = [75000, 158500, 130000, 70000, 62500, 65000, 14500, 80000, 425000, 18500]

In [None]:
removals = (cars['Type'] == 'Truck') | (cars['Model'] == 'S5 Coupe')
cars = cars[~removals]

In [None]:
cars = cars.sort_values('Rank')

In [None]:
cars["Horsepower"] = [550, 414, 252, 197, 720, 150, 616, 475]

In [None]:
audi_rsq8 = {'Brand':'Audi', 'Model':'RS Q8', 'Year': 2022, 'Rank': 2, 'Type':'SUV', 'Engine Type': 'Gas', 'Used Market Price': 126000, 'Horsepower': 590}
lexus_is350_sport = {'Brand':'Lexus', 'Model':'IS 350 F Sport', 'Year': 2022, 'Rank': 8, 'Type':'Car', 'Engine Type': 'Gas', 'Used Market Price': 42000, 'Horsepower': 472}

audi_rsq8, lexus_is350_sport = pd.DataFrame([audi_rsq8]), pd.DataFrame([lexus_is350_sport])
cars = pd.concat([cars, audi_rsq8, lexus_is350_sport], ignore_index=True).set_index('Rank').sort_index()

In [None]:
cars.sort_values('Used Market Price', ascending=False)

In [None]:
cars['Decade'] = (np.floor(cars['Year'].astype(int) / 10) * 10).astype(int)

In [None]:
german_brand = 'Mercedes|Audi|Porsche'
japanese_brand = 'Acura|Honda|Lexus|Mazda'
american_brand = 'Jeep'
british_brand = 'Jaguar'

brand_countries = ['Germany', 'Japan', 'United States', 'United Kingdom']
car_countries = [
                (cars['Brand'].str.contains(german_brand)),
                (cars['Brand'].str.contains(japanese_brand)),
                (cars['Brand'].str.contains(american_brand)),
                (cars['Brand'].str.contains(british_brand))
                ]
cars["Country"] = np.select(car_countries, brand_countries, default='Other')

In [None]:
cars_category = {'Brand':'category','Type':'category','Engine Type':'category','Country':'category'}
cars = cars.astype(cars_category)

In [None]:
cars.dtypes

In [None]:
sns.set_style("dark")
sns.barplot(data=cars, x='Country', y='Used Market Price', hue='Model', palette='muted')
plt.title('Used Market Price by Model and Country')

In [None]:
cars = lowercase_data(cars)

In [None]:
cars

In [None]:
sns.swarmplot(x='brand', y='horsepower', data=cars)

In [None]:
cars.to_csv('NEW_10_cars.csv')