
# WebWorks Dreams's Application Notebook

## Initialization

In [None]:
#Python import Pandas to be the container of all data. Import Seaborn for graphical data analysis.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

#Set the theme for Seaborn graphics
sns.set_theme()

#Read in CSV file into cars DataFrame show the first 5 rows of cars to see what we are working with.
cars = pd.read_csv('carPricing.csv')
cars

## Data Types

In [None]:
carObjects = list(cars.select_dtypes(include='object').columns)
carNum = list(cars.select_dtypes(include='number').columns)
carNum.remove('price')

# This creates graphs based on number type and string features
fig,ax=plt.subplots(nrows=len(carNum),ncols=1,figsize=(12,40))
count=0
for i in carNum:
    sns.regplot(x=cars[i],y=cars["price"],ax=ax[count])
    ax[count].set_title("Scatter plot of "+str(i)+" vs price column")
    count+=1
plt.tight_layout()

fig,ax=plt.subplots(nrows=len(carObjects),ncols=1,figsize=(12,40))
count=0
for i in carObjects:
    sns.stripplot(x=cars[i],y=cars["price"],ax=ax[count])
    ax[count].set_title("Scatter plot of "+str(i)+" vs price")
    count+=1
plt.tight_layout()

# This will display a heatmap between the different pricing features.
plt.figure(figsize=(16,8))
sns.heatmap(cars.corr(), annot=True, cmap='BuPu')

## Data Modification

In [None]:
# This function will iterate through the data to clear any duplicates and remove them.
cars = cars.drop(['car_ID'], axis=1)
carNum.remove('car_ID')

# the car's name will be removed from the list of features.
cars = cars.drop(['CarName'], axis=1)
carObjects.remove('CarName')

# Assigns data type
cars.dtypes

sampleVars = pd.get_dummies(cars, columns=carObjects, drop_first=True)
sampleVars.head()

## Model With Imports

In [None]:
# Importing a test split used to create a testing and training data.  Next, from skLearn the LinearRegression library is imported.
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

X = sampleVars.drop('price', axis=1)
Y = sampleVars['price']
xTrain, xTest, yTrain, yTest = train_test_split(X,Y, test_size = 0.05, random_state = 42)

#Create a linear regression model and see the score (0-1) 1 being the best
linearRegressionModel = LinearRegression()
linearRegressionModel.fit(xTrain, yTrain)

#R^2 score of the model between 0 and 1 (Higher is better)
linearRegressionModel.score(xTest, yTest)

## Model Assessment

In [None]:
#Use the model to predict the 11 test data entries to see how well it did.
yPredicted = linearRegressionModel.predict(xTest)
d = {'Predicted':list(yPredicted), 'Actual':list(yTest)}
df = pd.DataFrame(data=d)
df["Percentage"] = abs(df['Predicted']-df['Actual'])/df['Actual']*100
df

# UI for users

In [None]:
from ipywidgets import widgets
from IPython.display import clear_output

#Each car feature will need its own widget

symboling = widgets.RadioButtons(
    options=['-2', '-1', '0', '1', '2', '3'],
    description='Symboling:',
    disabled=False
)
display(symboling)

fueltype = widgets.RadioButtons(
    options=cars["fueltype"].unique(),
    description='Fuel Type:',
    disabled=False
)
display(fueltype)

aspiration = widgets.RadioButtons(
    options=cars["aspiration"].unique(),
    description='Aspiration:',
    disabled=False
)
display(aspiration)

doornumber = widgets.RadioButtons(
    options=cars["doornumber"].unique(),
    description='Doornumber:',
    disabled=False
)
display(doornumber)

carbody = widgets.RadioButtons(
    options=cars["carbody"].unique(),
    description='Car body:',
    disabled=False
)
display(carbody)

drivewheel = widgets.RadioButtons(
    options=cars["drivewheel"].unique(),
    description='Drive wheel:',
    disabled=False
)
display(drivewheel)

enginelocation = widgets.RadioButtons(
    options=cars["enginelocation"].unique(),
    description='Engine location:',
    disabled=False,
    layout={'width': 'max-content'}
)
display(enginelocation)

wheelbase = widgets.FloatSlider(
    value=cars['wheelbase'].min(),
    min=cars['wheelbase'].min(),
    max=cars['wheelbase'].max(),
    step=0.1,
    description='Wheelbase:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
display(wheelbase)

carlength = widgets.FloatSlider(
    value=cars['carlength'].min(),
    min=cars['carlength'].min(),
    max=cars['carlength'].max(),
    step=0.1,
    description='Car length:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
display(carlength)

carwidth = widgets.FloatSlider(
    value=cars['carwidth'].min(),
    min=cars['carwidth'].min(),
    max=cars['carwidth'].max(),
    step=0.1,
    description='Car Width:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
display(carwidth)

carheight = widgets.FloatSlider(
    value=cars['carheight'].min(),
    min=cars['carheight'].min(),
    max=cars['carheight'].max(),
    step=0.1,
    description='Car Height:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
display(carheight)

curbweight = widgets.IntSlider(
    value=cars['curbweight'].min(),
    min=cars['curbweight'].min(),
    max=cars['curbweight'].max(),
    step=1,
    description='Curb Weight:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
display(curbweight)

enginetype = widgets.RadioButtons(
    options=cars["enginetype"].unique(),
    description='Engine type:',
    disabled=False
)
display(enginetype)

cylindernumber = widgets.RadioButtons(
    options=cars["cylindernumber"].unique(),
    description='Cylinder number:',
    disabled=False
)
display(cylindernumber)

enginesize = widgets.IntSlider(
    value=cars['enginesize'].min(),
    min=cars['enginesize'].min(),
    max=cars['enginesize'].max(),
    step=1,
    description='Engine size:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
display(enginesize)

fuelsystem = widgets.RadioButtons(
    options=cars["fuelsystem"].unique(),
    description='Fuel system:',
    disabled=False
)
display(fuelsystem)

boreratio = widgets.FloatSlider(
    value=cars['boreratio'].min(),
    min=cars['boreratio'].min(),
    max=cars['boreratio'].max(),
    step=0.1,
    description='Bore ratio:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
display(boreratio)

stroke = widgets.FloatSlider(
    value=cars['stroke'].min(),
    min=cars['stroke'].min(),
    max=cars['stroke'].max(),
    step=0.1,
    description='Stroke:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
display(stroke)

compressionratio = widgets.FloatSlider(
    value=cars['compressionratio'].min(),
    min=cars['compressionratio'].min(),
    max=cars['compressionratio'].max(),
    step=0.1,
    description='Compression ratio:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)
display(compressionratio)

horsepower = widgets.IntSlider(
    value=cars['horsepower'].min(),
    min=cars['horsepower'].min(),
    max=cars['horsepower'].max(),
    step=1,
    description='Horse power:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
display(horsepower)

peakrpm = widgets.IntSlider(
    value=cars['peakrpm'].min(),
    min=cars['peakrpm'].min(),
    max=cars['peakrpm'].max(),
    step=1,
    description='Peak rpm:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
display(peakrpm)

citympg = widgets.IntSlider(
    value=cars['citympg'].min(),
    min=cars['citympg'].min(),
    max=cars['citympg'].max(),
    step=1,
    description='City mpg:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
display(citympg)

highwaympg = widgets.IntSlider(
    value=cars['highwaympg'].min(),
    min=cars['highwaympg'].min(),
    max=cars['highwaympg'].max(),
    step=1,
    description='Highway mpg:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
display(highwaympg)

#Quick check for dummy variables. If string is a match return 1.
def dummycheck(src, check):
    if src==check:
        return 1
    else:
        return 0

#When the submit button is pressed this function will be run to get the market value.
def SubmitForm(b):
    print("Generating Estimate...")
    d = {'symboling':symboling.value,
         'wheelbase':wheelbase.value,
         'carlength':carlength.value,
         'carwidth':carwidth.value,
         'carheight':carheight.value,
         'curbweight':curbweight.value,
         'enginesize':enginesize.value,
         'boreratio':boreratio.value,
         'stroke':stroke.value,
         'compressionratio':compressionratio.value,
         'horsepower':horsepower.value,
         'peakrpm':peakrpm.value,
         'citympg':citympg.value,
         'highwaympg':highwaympg.value,
         'fueltype_gas':dummycheck(fueltype.value, "gas"),
         'aspiration_turbo':dummycheck(aspiration.value, "turbo"),
         'doornumber_two':dummycheck(doornumber.value, "two"),
         'carbody_hardtop':dummycheck(carbody.value, "hardtop"),
         'carbody_hatchback':dummycheck(carbody.value, "hatchback"),
         'carbody_sedan':dummycheck(carbody.value, "sedan"),
         'carbody_wagon':dummycheck(carbody.value, "wagon"),
         'drivewheel_fwd':dummycheck(drivewheel.value, "fwd"),
         'drivewheel_rwd':dummycheck(drivewheel.value, "rwd"),
         'enginelocation_rear':dummycheck(enginelocation.value, "rear"),
         'enginetype_dohcv':dummycheck(enginetype.value, "dohcv"),
         'enginetype_l':dummycheck(enginetype.value, "l"),
         'enginetype_ohc':dummycheck(enginetype.value, "ohc"),
         'enginetype_ohcf':dummycheck(enginetype.value, "ohcf"),
         'enginetype_ohcv':dummycheck(enginetype.value, "ohcv"),
         'enginetype_rotor':dummycheck(enginetype.value, "rotor"),
         'cylindernumber_five':dummycheck(cylindernumber.value, "five"),
         'cylindernumber_four':dummycheck(cylindernumber.value, "four"),
         'cylindernumber_six':dummycheck(cylindernumber.value, "six"),
         'cylindernumber_three':dummycheck(cylindernumber.value, "three"),
         'cylindernumber_twelve':dummycheck(cylindernumber.value, "twelve"),
         'cylindernumber_two':dummycheck(cylindernumber.value, "two"),
         'fuelsystem_2bbl':dummycheck(fuelsystem.value, "2bbl"),
         'fuelsystem_4bbl':dummycheck(fuelsystem.value, "4bbl"),
         'fuelsystem_idi':dummycheck(fuelsystem.value, "idi"),
         'fuelsystem_mfi':dummycheck(fuelsystem.value, "mfi"),
         'fuelsystem_mpfi':dummycheck(fuelsystem.value, "mpfi"),
         'fuelsystem_spdi':dummycheck(fuelsystem.value, "spdi"),
         'fuelsystem_spfi':dummycheck(fuelsystem.value, "spfi")}
    custom = pd.DataFrame(data=d, index=[0])
    customPrice = linearRegressionModel.predict(custom)
    print("Car market value estimate: $" + str(round(customPrice.item(0))))

submitButton = widgets.Button(description="Submit")
submitButton.on_click(SubmitForm)
display(submitButton)