In [1]:
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
import pickle

In [2]:
flask_jobs_features = list()
flask_jobs_runtimes = list()
with open('flask_jobs.csv', 'r') as f:
    header = f.readline()
    for line in f:
        line = line.split()
        flask_jobs_runtimes.append(float(line[0].split(',')[5]))
        flask_jobs_features.append(list([float(line[1].strip(',')), line[2].strip("\']")]))

In [3]:
Y_flask = np.array(flask_jobs_runtimes)
X_flask = np.zeros((len(flask_jobs_features), 6))
for i in range(len(flask_jobs_features)):
    X_flask[i, 0] = flask_jobs_features[i][0]
    if flask_jobs_features[i][1] == 'word_count':
        X_flask[i, 1] = 1
    elif flask_jobs_features[i][1] == 'tim':
        X_flask[i, 2] = 1
    elif flask_jobs_features[i][1] == 'insertion':
        X_flask[i, 3] = 1
    elif flask_jobs_features[i][1] == 'bubble':
        X_flask[i, 4] = 1
    elif flask_jobs_features[i][1] == 'bogo':
        X_flask[i, 5] = 1
    else:
        print('you missed one...')

In [10]:
parameters = {'kernel': ['rbf'], 'C':[1, 10, 100, 1000, 10000], 'epsilon': [.1, .2, .3, .4, .5]}
clf_flask = GridSearchCV(SVR(), parameters, cv=5, refit=True, scoring='neg_mean_squared_error')
clf_flask.fit(X_flask, Y_flask)
print(clf_flask.cv_results_['mean_test_score'])
clf_flask_final = clf_flask.best_estimator_
with open('flask_predictor.pkl', 'wb') as f:
    pickle.dump(clf_flask_final, f)

[-2.32477123 -2.2854607  -2.28511375 -2.33912551 -2.38020751 -2.14443677
 -2.12278506 -2.13551899 -2.1565824  -2.22570582 -2.13054669 -2.12027949
 -2.13256653 -2.15057739 -2.21836097 -2.1304281  -2.12031597 -2.13251719
 -2.15062115 -2.21835725 -2.13037816 -2.12032143 -2.13254785 -2.15060779
 -2.21843102]


In [11]:
ml_jobs_features = list()
ml_jobs_runtimes = list()
with open('ml_jobs.csv', 'r') as f:
    header = f.readline()
    for line in f:
        line = line.split()
        ml_jobs_runtimes.append(float(line[0].split(',')[-3]))
        ml_jobs_features.append(list([float(line[1].strip(' ,')), float(line[2].strip(',')), float(line[3].strip(']'))]))
Y_ML = np.array(ml_jobs_runtimes)
X_ML = np.array(ml_jobs_features)

In [12]:
parameters = {'kernel': ['rbf'], 'C':[1, 10, 100, 1000, 10000], 'epsilon': [.1, .2, .3, .4, .5]}
clf_ML = GridSearchCV(SVR(), parameters, cv=5, refit=True, scoring='neg_mean_squared_error')
clf_ML.fit(X_ML, Y_ML)
print(clf_ML.cv_results_['mean_test_score'])
clf_ML_final = clf_ML.best_estimator_
with open('ML_predictor.pkl', 'wb') as f:
    pickle.dump(clf_ML_final, f)

[-0.01118333 -0.02577966 -0.05461229 -0.09783581 -0.16036938 -0.01118333
 -0.02577966 -0.05461229 -0.09783581 -0.16036938 -0.01118333 -0.02577966
 -0.05461229 -0.09783581 -0.16036938 -0.01118333 -0.02577966 -0.05461229
 -0.09783581 -0.16036938 -0.01118333 -0.02577966 -0.05461229 -0.09783581
 -0.16036938]


In [18]:
mr_jobs_features = list()
mr_jobs_runtimes = list()
with open('Eli_MR_output.csv', 'r') as f:
    header = f.readline()
    i = 0
    for line in f:
        line = line.split()
        mr_jobs_runtimes.append(float(line[0].split(',')[-3]))
        mr_jobs_features.append(list([float(line[1].strip(' ,')), float(line[2].strip(',')), float(line[3].strip(']'))]))
Y_MR = np.array(mr_jobs_runtimes)
X_MR = np.array(mr_jobs_features)

In [19]:
parameters = {'kernel': ['rbf'], 'C':[1, 10, 100, 1000, 10000], 'epsilon': [.1, .2, .3, .4, .5]}
clf_MR = GridSearchCV(SVR(), parameters, cv=5, refit=True, scoring='neg_mean_squared_error')
clf_MR.fit(X_MR, Y_MR)
print(clf_MR.cv_results_['mean_test_score'])
clf_MR_final = clf_MR.best_estimator_
with open('MR_predictor.pkl', 'wb') as f:
    pickle.dump(clf_MR_final, f)

[-64230.62910487 -64231.95617197 -64235.02826803 -64239.48070025
 -64243.49441689 -63452.01802209 -63450.94459184 -63450.16727687
 -63450.58790801 -63450.89720146 -64405.66594819 -64405.41466519
 -64405.15837351 -64404.84794574 -64404.44965143 -85086.76466567
 -85077.451547   -85068.18555197 -85058.90871825 -85049.77126099
 -91117.50041442 -91103.55038086 -91089.58422604 -91075.65058872
 -91061.6928144 ]
