In [1]:
import pandas as pd
import pickle

from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

init_notebook_mode(connected=True)

In [2]:
file_name = "data_encoded.csv"
encoded_data = pd.read_csv(file_name)

In [3]:
encoded_data.columns

Index(['apple-iphone-6s', 'apple-iphone-6s-plus', 'apple-iphone-7-a1660',
       'apple-iphone-7-plus-a1661', 'apple-iphone-8-a1863',
       'apple-iphone-8-plus-a1864', 'apple-iphone-se', 'apple-iphone-x-a1865',
       'att', 'sprint', 'tmobile', 'verizon', 'Fair', 'Good', 'Mint',
       'New (Resale)', 'Black', 'Gold', 'Gray', 'Grey', 'Red', 'Rose Gold',
       'Silver', 'Size', 'Recently Sold Price'],
      dtype='object')

In [4]:
y = encoded_data.pop('Recently Sold Price')
X = encoded_data

In [5]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=444)

regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(y_test, y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(y_test, y_pred))

Coefficients: 
 [-157.2972649   -99.95847545 -103.36730526  -13.12241974   88.09605832
  179.08530693 -199.2456744   305.80977449   14.77437741   -5.35375979
   -2.76370439   -6.65691322  -46.94445563  -14.62013034   10.49687109
   51.06771487   10.26484433   -1.23542126    5.84169778   -9.76722358
   14.14108847   -3.41412665  -15.83085908    0.34452261]
Mean squared error: 1179.64
Variance score: 0.97



internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.



In [6]:
max(y_pred)

738.8822464465013

In [7]:
# Plot outputs
print("X:",X_test.shape,"y:",y_test.shape)

trace1 = go.Scatter(
        x=(y_pred),
        y=(y_test),
        mode='markers'
        )
trace2 = go.Scatter(
        x=list(range(0,int(max(y_pred)))),
        y=list(range(0,int(max(y_pred)))),
        mode='markers'
        )
data = [trace1,trace2]

iplot(data,filename='basic')

X: (25, 24) y: (25,)


## Testing With An Example

In [8]:
mlb_model = pickle.load(open('mlb_model.sav', 'rb'))
mlb_color = pickle.load(open('mlb_color.sav', 'rb'))
mlb_carrier = pickle.load(open('mlb_carrier.sav', 'rb'))
mlb_condition = pickle.load(open('mlb_condition.sav', 'rb'))

In [9]:
print(mlb_model.classes_)

['apple-iphone-6s' 'apple-iphone-6s-plus' 'apple-iphone-7-a1660'
 'apple-iphone-7-plus-a1661' 'apple-iphone-8-a1863'
 'apple-iphone-8-plus-a1864' 'apple-iphone-se' 'apple-iphone-x-a1865']


In [10]:
def predict(model,color,carrier,condition,size,coeff):
    test_vect = []
    
    test_vect.extend(mlb_model.transform([[model]])[0])
    test_vect.extend(mlb_color.transform([[color]])[0])
    test_vect.extend(mlb_carrier.transform([[carrier]])[0])
    test_vect.extend(mlb_condition.transform([[condition]])[0]) 
    test_vect.extend([size])
    print(test_vect)
    
    pred_price = np.dot(regr.coef_, np.transpose(test_vect))
    
    return '$'+ str(round(pred_price,2))

In [11]:
predict('apple-iphone-x-a1865','Gray','att','Good',256,regr.coef_)

[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 256]


'$456.45'