In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
# input all the things
import pandas  as pd 
import numpy  as np 
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import r2_score
from sklearn import model_selection
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
import scipy.stats as stats
import math
from IPython.display import Markdown, Javascript, display
from ipywidgets import widgets

%matplotlib inline

In [6]:
# read in data
mpg_df = pd.read_csv('../datasets/mpg_fin.csv')
mpg_not_scaled = pd.read_csv('../datasets/mpg_cats.csv')

In [7]:
# The function that turns object features into categories then into numeric values
def cat_code(df):
    l_o_types = list(df.dtypes)
    col_names = list(df.columns)
    zip_list = list(zip(col_names, l_o_types))
    
    for i in range(len(zip_list)):
        if zip_list[i][1] == 'object':
            feat = zip_list[i][0]
            df[feat] = df[feat].astype('category')
        i += 1
    
    cat_columns = df.select_dtypes(['category']).columns
    cat_columns
    
    df[cat_columns] = df[cat_columns].apply(lambda x: x.cat.codes)

In [8]:
# The function that will build and fit models to predict MPG based on user input 'year' and values
def user_predict(year, vals):
    
    # Build my data frame and target variable for the ridge/linear regression models
    X = mpg_df[mpg_df['year'] == year][['vehicle_class', 'cylinders',
                                        'displacement', 'fuel_type']]    
    y = mpg_df[mpg_df['year'] == year].combMPG
    
    # send the data frame to the cat_code function to change strings to numbers
    cat_code(X)
        
    # instantiate the ridge/linear models
    ridgereg = Ridge(alpha=.1)
    linreg = LinearRegression()
    
    # Fit the ridge/linear models
    model = ridgereg.fit(X, y)
    model2 = linreg.fit(X, y)
    
    # make predictions based on user input values
    pred = ridgereg.predict([vals])
    pred2 = linreg.predict([vals])
    
    # print results
    display(Markdown('<br/><strong>The predicted MPG is approximately:</strong>'))
    display(Markdown('**{}** or **{}**'.format(pred, pred2)))

In [9]:
# A function that takes the user input 'year' and creates data frames/lists of valid variable values
# the results are printed
def get_vars(year):
    # Get valid vehicle class values by using 'year' to filter data then build new data frame
    classes = list(zip(mpg_not_scaled[mpg_not_scaled['year'] == year].vehicle_class.unique(), 
                       mpg_df[mpg_df['year'] == year].vehicle_class.unique()))
    classes.sort(key=lambda tup: tup[0])
    classes_df = pd.DataFrame(classes, columns = ['number', 'class'])
    
    # Get valid fuel values by using 'year' to filter data then build new data frame
    fuels = list(zip(mpg_not_scaled[mpg_not_scaled['year'] == year].fuel_type.unique(), 
                       mpg_df[mpg_df['year'] == year].fuel_type.unique()))
    fuels.sort(key=lambda tup: tup[0])
    fuels_df = pd.DataFrame(fuels, columns = ['number', 'fuel type'])
    
    # display the results
    display(Markdown('<br/><strong>The available vehicle classes are:</strong>'))
    display(classes_df)
    display(Markdown('<br/><strong>The available cylinders are:</strong>'))
    print(sorted(mpg_df.cylinders.unique()))    # here I am creating a list of valid cylinders values
    display(Markdown('<br/><strong>The available diplacements are:</strong>'))
    print(sorted(mpg_df.displacement.unique())) # here I am creating a list of valid displacement values
    display(Markdown('<br/><strong>The available fuel types are:</strong>'))
    display(fuels_df)

<img src="https://dannysengineportal.com/common-causes-of-poor-fuel-economy/fuel-efficiency-road-sign-illustration-design/" style="float: left; margin: 40px; height: 300px">

# User MPG Prediction

This form can be used in order to predict miles per gallon given a few variables. 

Through researching available EPA data, the primary variables affecting fuel efficiency are:
* vehicle type (or class)
* cylinders
* displacement
* fuel type

If you'd like to view my research, you can find relevant programming notebooks here:

If you'd like to get the data for yourself, you can find it here:

## To use this program, enter a year. You'll be given a list of valid vehicle classes, cylinders, displacements, and fuel types.

#### Enter the information as follows: (vehicle code cylinders displacement fuel code) 
#### Don't use commas ... 

For example:
0 6 3.6 8

In [4]:
# simple function to run cells 
def run_all(ev):
    display(Javascript('IPython.notebook.execute_cells_below()'))

In [5]:
# a function to create a button in order to runt the code below
button = widgets.Button(description="GO")
button.on_click(run_all)
display(button)

Button(description='GO', style=ButtonStyle())

In [None]:
year_input = int(input("Enter a year between 1985 and 2018"))

get_vars(year_input)

features = input("Input chosen features from list above separated by a space. No commas or quotes: ")
featured_cols = features.split()   # required step to get input into a usable list

featured_cols = [float(i) for i in featured_cols]
user_predict(year_input, featured_cols)