In [105]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from scipy.stats import zscore
import traceback

M1_STD_THRESHOLD = 2
M2_STD_THRESHOLD = 2

M2_DATA_FILE_PATH = "/home/ahsan/Downloads/days_to_sell_m1.csv"

In [109]:
def getData(year, make, model, trim):

    allData = pd.read_csv(M2_DATA_FILE_PATH)
    
    trimData = allData[allData["make"] == make]
    trimData = trimData[trimData["model"] == model]
    trimData = trimData[trimData["trim"] == trim]
   
    return trimData

In [132]:
def removeOutliers(data):
    data['days_to_sell'] = data['days_to_sell'].astype(int)
    data['year'] = data['year'].astype(int)
   
    data['zs_days_to_sell'] = data.groupby(['make' , 'model', 'trim']).days_to_sell.transform(lambda x: zscore(x, ddof=1))

    # for single year value, where we don't have zcore
    data['zs_days_to_sell'].fillna(M2_STD_THRESHOLD, inplace=True)

    p_lower = data['zs_days_to_sell'] <= M2_STD_THRESHOLD
    p_higer = data['zs_days_to_sell'] >= (-1 * M2_STD_THRESHOLD)

    data = data[p_lower & p_higer]
    
    data = data.drop(['zs_days_to_sell', 'trim', 'make', 'model'], axis=1)
    
    return data


In [133]:
data = getData(2012, "Hyundai", "Accent", "GL")
print( removeOutliers(data) )

      year  days_to_sell
157   2012            26
158   2012            27
159   2012            45
160   2012            70
161   2012            74
162   2012            94
163   2012            95
164   2012            98
165   2012            99
166   2012           102
167   2012           103
168   2012           107
169   2012           108
170   2012           109
171   2012           110
172   2012           112
173   2012           115
174   2012           116
1285  2013            18
1286  2013            32
1287  2013            35
1288  2013            95
2609  2014            30
2610  2014            41
4105  2015            11
4106  2015            17
4107  2015            30
4108  2015            37
4109  2015            49
4110  2015            55
4111  2015            69
6279  2016            11
6280  2016            22
6281  2016            95
8084  2017            14
8085  2017            15
8086  2017            16
8087  2017            22
8088  2017            24


In [130]:
# we are no more using R2 score for M1
# M1_ACCEPTABLE_R2_SCORE = 0
M1_ACCEPTABLE_R2_SCORE = 0.45
DEPRECIATION_PERCENTAGE = 1.5


def sanitize(str):
    return str.replace(" ", "_").replace("-", "_").replace(".", "_").replace("+", "_").lower()



def predict_m1(year, make, model, trim):
    response = {"score": 0, "days_to_sell": 0, "error": ""}
    trimData = getData(year, make, model, trim)

    if len(trimData) > 0:
        trimDataOutliersRemoved = removeOutliers(trimData)
        
        if len(trimDataOutliersRemoved) > 2:
           
                X = trimDataOutliersRemoved.drop('days_to_sell', axis='columns')
                
                y = trimDataOutliersRemoved.days_to_sell
               
                model = LinearRegression()
                
                model.fit(X, y)
                
                response['score'] = model.score(X, y)
                
        
               
                x = np.zeros(len(X.columns))
                x[0] = int(year)
               
                try:
                    response['days_to_sell'] = model.predict([x])[0]
                except Exception as e:
                    response['days_to_sell'] = 0

                return response
            
        else:
            return response
    else:
        return response


def getPrice(year, make, model, trim):
    try:
        m1_response = predict_m2(year, make, model, trim)
        
        print( m1_response )

        if m1_response['score'] > M1_ACCEPTABLE_R2_SCORE:
                 final_days = m1_response['days_to_sell']
        else:
             final_days = 0

        if final_days < 0:
             final_days = 0

        return final_days
    except Exception as ex:
        print(ex)
        print(traceback.format_exc())
        return 0

In [134]:
trim = "GL"
make = "Hyundai"
model = "Accent"
year = 2012

getPrice( year, make, model, trim  )

{'score': 0.24752109793623922, 'days_to_sell': 80.24236252545961, 'error': ''}


0