In [None]:
import numpy as np
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import CaidaClassicLR as CCLR
import os
import csv

# Fit regression model

'''Create an object that holds SVR model object and the name of the model used.'''
is_production = True

class Model_NAME():
    
    def __init__(self, svr_obj, name):
        self.svr_model = svr_obj
        self.name = name

'''Evaluates three forms of SVR models on X and Y passed as param and returns a list of R^2 defined as:
The coefficient R^2 is defined as (1 - u/v), where u is the regression sum of
squares ((y_true - y_pred) ** 2).sum() and v is the residual
sum of squares ((y_true - y_true.mean()) ** 2).sum(). 
Best possible score is 1.0 and it can be negative (because the model can be arbitrarily worse). 
A constant model that always predicts the expected value of y, 
disregarding the input features, would get a R^2 score of 0.0.'''

def benchmark_SVR(list_models, x, y, n_samples=100):
    X = x[0:n_samples]
    Y = y[0:n_samples]
    scores_list = []
    for model_name in list_models:
        svr_model = model_name.svr_model.fit(X,Y)        
        row = [svr_model.score(X,Y)]
        scores_list.append(row)
    print("Completed benchmark_svr \n")
    return [model_name.name for model_name in list_models], scores_list #list of modelnames, list of score,


# In[9]:

#Initialize the parameters for SVR models. We iteratively go on finding optimal epsilon and gamma values.

def recordBenchSVR(out_put_file_name, score_rec,feature_list):
    #fname_score= 'benchmark-lr--scores-%s.csv'%datetime.datetime.now().strftime("%d%b%Y_%H_%M_%S")
    fname_score= out_put_file_name #'benchmark-lr--scores.csv'
    filename= result_path+fname_score
    file_exists = os.path.isfile(filename)
    with open(filename, 'a') as csvfile:
        headers = ['svr-kernel','score']+feature_list
        writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n',fieldnames=headers)
        if not file_exists:
            writer.writeheader()
        for score_list in score_rec:            
            csvfile.write(','.join(map(str, score_list))+" \n")
        #np.savetxt(fname, X[, fmt, delimiter, newline, ...])
        
    print("record-benchamar-lr completed")  
def initialize_SVR_estimators():
    C = 1e3
    gamma = 0.1
    degree = [2, 3]
    epsilon = 0.2
    max_iter = 1
    #create model objects  of different SVR models.
    svr_rbf = SVR(kernel = 'rbf', C=C, epsilon = epsilon, gamma=gamma, max_iter = max_iter)
    svr_lin = SVR(kernel = 'linear', C=C, epsilon = epsilon)
    svr_poly2 = SVR(kernel = 'poly', C=C, epsilon = epsilon, degree=degree[0])
    svr_poly3 = SVR(kernel = 'poly', C=C, epsilon = epsilon, degree=degree[1])
    svr_sigmoid = SVR(kernel = 'sigmoid', C=C, epsilon = epsilon)

    ##Create objects of model_name class using above models.
    mnsvr_rbf = Model_NAME(svr_rbf, "rbf") #modelnamesvr_rbf
    mnsvr_linear = Model_NAME(svr_lin, "linear")
    mnsvr_poly2 = Model_NAME(svr_poly2, "poly-deg-2")
    mnsvr_poly3 = Model_NAME(svr_poly3, "poly-deg-3")
    mnsvr_sigmoid = Model_NAME(svr_sigmoid, "sigmoid-3")

    #list_models = [mnsvr_rbf, mnsvr_linear, mnsvr_poly2, mnsvr_poly3, mnsvr_sigmoid]
    list_models = [mnsvr_rbf, mnsvr_linear, mnsvr_sigmoid]
    
    return list_models

def run_SVR(result_path, dirst, correlation_matrix_file, theta_file):
    
    '''
    ##Initialize your regressors and targets here.

    #1. Generate X with all the features available. Generate a target as Y.

    #2. Specifically, divide training features into different divisions consisting of 
    # different features. As below:

        #a) X contain ALL the features
        #b) X contain HIGHLY UNCORRELATED FEATURES FOR target variable  Y.
        #c) X contain some features without any ff.
        #d) X contain some features without any ff.
        #e) X contain f2 only
        #f) X contain f1 only. 
    '''
    
    app_file = '../data/big-data-csv.csv'
    appdf = pd.read_csv(app_file,sep=',')
    X = appdf.iloc[0:,[2]].values #for example
    Y = appdf.iloc[0:,6].values#for example

    '''#Mind this is a test data.
        #For Example:
        np.random.seed(0)
        Y = np.random.randn(n_samples)
        X = np.random.randn(n_samples, n_features)

    '''
    
    feature_list = list(column_dict.values()[3:])   
    
    n_samples, n_features = Y.shape[0], Y.shape[1]
    #Pass training set and testing set of X and Y
    
    list_models = initialize_SVR_estimators()
    list_record = []
    for estimator in list_models: #iterate for feature i,
        m_row = [estimator.name]
        for feature_id in range(0,n_features):#iterate over estimator in list_models
            
            ml, sl = benchmark_SVR([estimator], X, Y[:,feature_id], n_samples) ##Be carefyk that your target whould be one dimensional list.
            m_row += sl[0]#first element is list=['modelname',score_val]
        
        list_record.append(m_row)
    recordBenchSVR('SVR-score-g0d1-ep-0d2.csv', list_record, feature_list)
    return list_record                  


# In[8]:

if __name__ == '__main__':
    
    is_production= True
    
    if is_production is True:
        result_path = '../data/'
        dirst='../data'
        correlation_matrix_file = '../data/corr_all.csv'
        theta_file = '../data/scoretheta.csv'
    else:
        result_path = '../data/'
        dirst='../data/google-apps' 
        correlation_matrix_file = 'corr_all.csv'
        theta_file = 'theta.csv'
    
    list_record = run_SVR(result_path, dirst, correlation_matrix_file, theta_file)


