Goal is to find the regression of price on model_year for every car model in the list, using Support Vector Regression to as the regression algorithm. I am using GridSearchCV to find the best SVR parameters.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
pd.set_option('display.max_rows', 500)

In [None]:
#creating dataframe of all cars

both = pd.read_csv('/Users/avacheevers/Documents/Cars/both.csv')
sources = []
for car in both['Car']:
    file = '/Users/avacheevers/Documents/Cars/3_cleaned_df_' + car + '.csv'
    df = pd.read_csv(file)
    if car == 'Volkswagen Beetle':
        df1 = pd.read_csv('/Users/avacheevers/Documents/Cars/3_cleaned_df_new_beetle.csv')
        df = pd.concat([df, df1], ignore_index=True)
    df['car'] = car
    sources.append(df)
    
df = pd.concat(sources, ignore_index=True)
df = df[['car','make_model', 'model_year','trim', 'price', 'mileage']]

In [None]:
# grid search parameters
clf = GridSearchCV(
        estimator=SVR(kernel='poly'),
        param_grid={
            'degree': [1,2,3,4],
            'C': [0.01, 0.1, 1, 100, 1000, 10000],
            'epsilon': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10],
            'gamma': [0.0001, 0.001, 0.005, 0.1, 1, 3, 5]
        },
        cv=5, scoring='neg_mean_squared_error', verbose=0, n_jobs=-1)

In [None]:
# looping through all car models
for car in df.car.unique():
    df_sub = df[df['car'] == car]
    df_sub = df_sub.dropna()
    df_sub = df_sub.sort_values(by=['model_year'])
    
    X = df_sub['model_year']
    y = df_sub['price']

    X = np.array(X).reshape(-1,1)
    
    
    grid_result = clf.fit(X, y)
    best_params = grid_result.best_params_
    best_svr = SVR(kernel='rbf', C=best_params["C"], epsilon=best_params["epsilon"], gamma=best_params["gamma"],
                   coef0=0.1, shrinking=True,
                   tol=0.001, cache_size=200, verbose=False, max_iter=-1).fit(X,y)
    
    fit[car] = best_svr
    
    yfit = best_svr.predict(X)
    
    plt.scatter(X, y, s=5, color="blue", label="original")
    plt.plot(X, yfit, lw=2, color="red", label="fitted")
    plt.legend()
    plt.title(car)
    plt.savefig('/Users/avacheevers/Documents/Cars/5_'+car+'.png')
    plt.show()
    
    arr = np.array(range(1960,2021))
    arr = np.array(arr).reshape(-1,1)
    yfit = best_svr.predict(arr)
    min_index_row = np.argmin(yfit, axis=0)
    min_price[car] = arr[min_index_row]
    
    print(car, 'completed')