In [43]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from scipy.stats import zscore
import traceback

M1_STD_THRESHOLD = 2
M2_STD_THRESHOLD = 2

M2_DATA_FILE_PATH = "/home/ahsan/Downloads/days_to_sell_m2.csv"

In [77]:
def getData(year, make, model, trim, mileage):

    allData = pd.read_csv(M2_DATA_FILE_PATH)
    
    trimData = allData[allData["make"] == make]
    trimData = trimData[trimData["model"] == model]
    trimData = trimData[trimData["trim"] == trim]
   
    return trimData

In [82]:
def removeOutliers(data):
    data['days_to_sell'] = data['days_to_sell'].astype(int)
    data['year'] = data['year'].astype(int)
    data['mileage'] = data['mileage'].astype(int)
   
    data['zs_days_to_sell'] = data.groupby(['make' , 'model', 'trim', 'year']).days_to_sell.transform(lambda x: zscore(x, ddof=1))

    # for single year value, where we don't have zcore
    data['zs_days_to_sell'].fillna(M2_STD_THRESHOLD, inplace=True)

    p_lower = data['zs_days_to_sell'] <= M2_STD_THRESHOLD
    p_higer = data['zs_days_to_sell'] >= (-1 * M2_STD_THRESHOLD)

    data = data[p_lower & p_higer]
    
    data = data.drop(['zs_days_to_sell', 'trim', 'make', 'model'], axis=1)
    
    return data


In [83]:
trim = "TFSI quattro"
make = "Audi"
model = "Q7"
year = 2013
mileage = 150000

data = getData(year, make, model, trim, mileage)
print( removeOutliers(data) )

      year  mileage  days_to_sell
18    2012   145000            90
19    2012   145469            44
1133  2013    74912            22
1134  2013    94900             9
1135  2013    95000             8
1136  2013   110000            33
1137  2013   116500            31
1138  2013   156000            17
1139  2013   157500            19
1140  2013  1250000            43
2796  2014    81000            58
2797  2014    81000            79
2798  2014    96800            38
2799  2014   119000            25
2800  2014   119400            28
2801  2014   167523            44


In [84]:
# we are no more using R2 score for M1
# M1_ACCEPTABLE_R2_SCORE = 0
M2_ACCEPTABLE_R2_SCORE = 0.45
DEPRECIATION_PERCENTAGE = 1.5


def sanitize(str):
    return str.replace(" ", "_").replace("-", "_").replace(".", "_").replace("+", "_").lower()



def predict_m2(year, make, model, trim, mileage):
    response = {"score": 0, "days_to_sell": 0, "error": ""}
    trimData = getData(year, make, model, trim, mileage)

    
    if len(trimData) > 0:
        trimDataOutliersRemoved = removeOutliers(trimData)
        
        if len(trimDataOutliersRemoved) > 2:
           
                X = trimDataOutliersRemoved.drop('days_to_sell', axis='columns')
                
                y = trimDataOutliersRemoved.days_to_sell
               
                model = LinearRegression()
                
                model.fit(X, y)
                
                response['score'] = model.score(X, y)
                
                x = np.zeros(len(X.columns))
                x[0] = int(year)
                x[1] = int(mileage)
               
                try:
                    response['days_to_sell'] = model.predict([x])[0]
                except Exception as e:
                    response['days_to_sell'] = 0

                return response
            
        else:
            return response
    else:
        return response


def getPrice(year, make, model, trim, mileage):
    try:
        m2_response = predict_m2(year, make, model, trim, mileage)
        
        print( m2_response )

        if m2_response['score'] > M2_ACCEPTABLE_R2_SCORE:
                 final_days = m2_response['days_to_sell']
        else:
             final_days = 0

        if final_days < 0:
             final_days = 0

        return final_days
    except Exception as ex:
        print(ex)
        print(traceback.format_exc())
        return 0

In [85]:
trim = "TFSI quattro"
make = "Audi"
model = "Q7"
year = 2013
mileage = 150000

getPrice( year, make, model, trim, mileage )

{'score': 0.006154399086188534, 'days_to_sell': 36.78162591170735, 'error': ''}


0