#  **FINAL PROJECT**
## PREDICT ICO PRICE USING MACHINE LEARNING
![alt text](https://i.imgur.com/mFQhIcT.jpg)


### **AGENDA** 
- Import the necessary libraries and write the necessary functions.
- Loading dataset.
- Print out a scatter plot showing the correlation between the inputs and the outputs.
- Encrypt and split data.
- Using multivariable regression to calculate the outcome prediction => prove the influence of overfitting.
- Ridge Regression
  - Find the regression model with the best rMSE value.
  - Find the regression model with the best R2 value.
  - Load the above 2 models and calculate the performance.
  - ICO price prediction.
- Neural Network
  - Find out the NN model with the best rMSE value.
  - Find the model NN with the best R2 value.
  - Load the above 2 models and calculate the performance.
  - ICO price prediction.

### **NEEDED LIB AND FUNCTION**

### **LIBs**

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('text', usetex=False)
import pandas as pd
from math import sqrt
import seaborn as sns
sns.set(style="ticks", color_codes=True)
from enum import Enum
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
import os
from sklearn.preprocessing import OneHotEncoder
import joblib
from sklearn import decomposition, datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import wget

### **FUNCTION**

In [None]:
class LineaReggressionMetrics(Enum):
    R_SQUARED = 1
    ROOT_MEAN_SQUARED_ERROR = 2
    MEAN_ABSOLUTE_ERROR = 3

In [None]:
class Solvers(Enum):
    lbfgs = 1
    sgd = 2
    adam = 3

In [None]:
def load_DatasetUseNumpy(dataset = "F:/PYTHON/DATA_ANALYSIS_PYTHON/00_PROJECT_ICO/DATA/ico_data_final.csv"):
    all_data = np.genfromtxt(fname=dataset, names=True, delimiter=',')
    # Get all header in dataset.    
    featureNames = all_data.dtype.names[1:-1] 
    # Get all name ico.
    ico_names = np.genfromtxt(fname=dataset, delimiter=',',skip_header=1, usecols=0, dtype=str)
    # Get all data in dataset except header and name ico.
    all_data = np.genfromtxt(fname=dataset, delimiter=',',skip_header=1)[:,1:] 
    # Get all value attributes of the output.
    x = np.genfromtxt(fname=dataset, delimiter=',',skip_header=1)[:,1:-1]
    # Get all output.
    y = np.genfromtxt(fname=dataset, delimiter=',',skip_header=1)[:,-1]
    return (featureNames,x,y,ico_names)


In [None]:
def load_DatasetUsePanDas(path = "F:/PYTHON/DATA_ANALYSIS_PYTHON/00_PROJECT_ICO/DATA/ico_data_final.csv"):    
    dataset = pd.read_csv(path)
    x= dataset.iloc[:, 1:-1].values 
    data_com = dataset.iloc[:,1:14].values
    print(data_com)
    # Get last column.
    y= dataset.iloc[:,13].values
    icoNames = dataset.iloc[:,0]
    featureNames = list(dataset.columns[1:-1])
    return (featureNames, x, y, icoNames, data_com)

In [None]:
def createFolderIfDoesntExist(folderName):
    exists = os.path.isdir(folderName)
    if not exists:
        os.makedirs(folderName)
        
    return exists

In [None]:
# Download Dataset form git.
def loadDataFromGithub(fileName= "F:/PYTHON/DATA_ANALYSIS_PYTHON/00_PROJECT_ICO/DATA/ico_data_final.csv"):
    exists = os.path.isfile(fileName)
    if not exists:
      !wget https://raw.githubusercontent.com/daominhthuan42/ICO_FINAL/master/CODE/ico_data_final.csv -P "./DATASET/"

In [None]:
def loadModel(fileName):
    exists = os.path.isfile(fileName)
    if exists:        
        return joblib.load(fileName)
    else:
        return None

In [None]:
def createResultsFolderIfDoesntExist(folder):
    folderExists = createFolderIfDoesntExist(folder)
    if not folderExists:
      #download best models from git 
      !wget https://raw.githubusercontent.com/daominhthuan42/ICO_FINAL/master/BEST_MODEL/bestRegressionModel_Ridge_ROOT_MEAN_SQUARED_ERROR.sav -P "./RESULT/"
      !wget https://raw.githubusercontent.com/daominhthuan42/ICO_FINAL/master/BEST_MODEL/bestRegressionModel_Ridge_R_SQUARED.sav -P "./RESULT/"
      !wget https://raw.githubusercontent.com/daominhthuan42/ICO_FINAL/master/BEST_MODEL/bestRegressionModel_Ridge_MEAN_ABSOLUTE_ERROR.sav -P "./RESULT/"
      !wget https://raw.githubusercontent.com/daominhthuan42/ICO_FINAL/master/BEST_MODEL/bestNNModel_MEAN_ABSOLUTE_ERRORsgd.sav -P "./RESULT/"
      !wget https://raw.githubusercontent.com/daominhthuan42/ICO_FINAL/master/BEST_MODEL/bestNNModel_ROOT_MEAN_SQUARED_ERRORsgd.sav -P "./RESULT/"
      !wget https://raw.githubusercontent.com/daominhthuan42/ICO_FINAL/master/BEST_MODEL/bestNNModel_R_SQUAREDsgd.sav -P "./RESULT/"        

In [None]:
def plotExpectedVsPredictedOutput(y_test, y_pred, fileName='', saveToFile=False):
    fig, ax = plt.subplots(figsize=(10,10))
    ax.scatter(y_test, y_pred, c = "blue", label = "y_pred")
    ax.plot([y_pred.min(), y_pred.max()], [y_pred.min(), y_pred.max()], 'k--', lw=2)
    ax.set_xlabel('Measured')
    ax.set_ylabel('Predicted')
    ax.legend(loc="upper left")
    plt.xlim(-10, 10)
    plt.ylim(-10, 10)
    plt.title(fileName)
    if(saveToFile == True):
        createFolderIfDoesntExist("IMAGE_RESULT/")
        plt.savefig('IMAGE_RESULT/'+fileName+".png", dpi = 300)
    else: 
        plt.show()

In [None]:
def plotExpectedVsPredictedOutput2(y_test, y_pred, fileName='', saveToFile = True):
    plt.subplots(figsize=(6,6))    
    plt.xlabel('y_pred')
    plt.ylabel('y_test_holdout')   
    sns.regplot(y_pred, y_test)    
    plt.title(fileName)
    if(saveToFile == True):
        createFolderIfDoesntExist("IMAGE_RESULT/")
        plt.savefig('IMAGE_RESULT/'+fileName+".png", dpi = 300)    
    plt.show()

In [None]:
def line_plot(line1, line2, label1= 'actual', label2= 'prediction', title='', lw=2, saveToFile = True):
    fig, ax = plt.subplots(1, figsize=(13, 7))
    ax.plot(line1,'-o',  color='darkorange', label=label1, linewidth=lw)
    ax.plot(line2, '-s', color='navy', label=label2, linewidth=lw)   
    ax.set_title(title, fontsize=16)
    ax.legend(loc='best', fontsize=16)
    if(saveToFile == True):
        createFolderIfDoesntExist("IMAGE_LINE_PLOT/")
        plt.savefig('IMAGE_LINE_PLOT/'+title+".png", dpi = 500)
    plt.show()

In [None]:
def line_plot2(line, lw = 2, saveToFile= True, title='', label1=''):
    fig, ax = plt.subplots(1, figsize=(12, 8))
    ax.plot(line,  color='darkorange', label=label1, linewidth=lw)    
    ax.set_title(title, fontsize=16)
    ax.legend(loc='best', fontsize=16)
    if(saveToFile == True):
        createFolderIfDoesntExist("OPTIMIZE/")
        plt.savefig('OPTIMIZE/'+title+".png", dpi = 300)
    plt.show()

In [None]:
def plotResult(y_test_hodlout, y_pred_rMSE, y_pred_R2, y_pred_MAE, saveToFile = True, rMSE = '', R2= '', MAE = ''):
  f, ax = plt.subplots(figsize=(10, 7), nrows=3)  
  f.tight_layout()
  ax[0].plot(y_pred_rMSE, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[0].plot(y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[0].set_title("Predicted vs Actuals Using Best model rMSE: " + rMSE)
  ax[0].legend(loc='best')

  ax[1].plot(y_pred_R2, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[1].plot(y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[1].set_title("Predicted vs Actuals Using Best model R2: "+R2)
  ax[1].legend(loc='best')

  ax[2].plot(y_pred_MAE, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[2].plot(y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[2].set_title("Predicted vs Actuals Using Best model MAE: "+ MAE)
  ax[2].legend(loc='best')
  if(saveToFile == True):
      createFolderIfDoesntExist("LINE_PLOT_3/")
      plt.savefig('LINE_PLOT_3/'+"PLOT_RESULT"+".png", dpi = 300)

In [None]:
def plotResult2(y_test_hodlout, y_pred_Ridge_rMSE, y_pred_Ridge_R2, y_pred_Ridge_MAE, y_pred_NN_rMSE, 
                y_pred_NN_R2, y_pred_NN_MAE, saveToFile = True, solver = Solvers.lbfgs):
  f, ax = plt.subplots(figsize=(15, 10), nrows=6)   
  f.tight_layout()
  ax[0].plot(range(len(y_pred_Ridge_rMSE)), y_pred_Ridge_rMSE, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[0].plot(range(len(y_pred_Ridge_rMSE)), y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[0].set_title("Predicted vs Actuals Using Best model rMSE Ridge")
  ax[0].legend(loc='best')

  ax[1].plot(range(len(y_pred_Ridge_R2)), y_pred_Ridge_R2, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[1].plot(range(len(y_pred_Ridge_R2)), y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[1].set_title("Predicted vs Actuals Using Best model R2 Ridge")
  ax[1].legend(loc='best')

  ax[2].plot(range(len(y_pred_Ridge_MAE)), y_pred_Ridge_MAE, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[2].plot(range(len(y_pred_Ridge_MAE)), y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[2].set_title("Predicted vs Actuals Using Best model MAE Ridge")
  ax[2].legend(loc='best')

  ax[3].plot(range(len(y_pred_NN_rMSE)), y_pred_NN_rMSE, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[3].plot(range(len(y_pred_NN_rMSE)), y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[3].set_title("Predicted vs Actuals Using Best model rMSE NN " + str(solver.name))
  ax[3].legend(loc='best')

  ax[4].plot(range(len(y_pred_NN_R2)), y_pred_NN_R2, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[4].plot(range(len(y_pred_NN_R2)), y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[4].set_title("Predicted vs Actuals Using Best model R2 NN " + str(solver.name))
  ax[4].legend(loc='best')

  ax[5].plot(range(len(y_pred_NN_MAE)), y_pred_NN_MAE, '-s', color='navy', label='Predicted Values', linewidth = 1);
  ax[5].plot(range(len(y_pred_NN_MAE)), y_test_hodlout, '-o', color='darkorange', label='Actual Values', linewidth = 1);
  ax[5].set_title("Predicted vs Actuals Using Best model MAE NN " + str(solver.name))
  ax[5].legend(loc='best')  
  if(saveToFile == True):
      createFolderIfDoesntExist("LINE_PLOT_6/")
      plt.savefig('LINE_PLOT_6/'+"PLOT_RESULT"+".png", dpi = 300)

In [None]:
def plotPredicted(rMSE, R2, MAE):
  plt.figure(figsize=(10, 5))
  ax1 = plt.subplot(311, title ='Price ICO 0x, modum and crypto20 using best model rMSE')
  ax1.hist(rMSE)
  ax2 = plt.subplot(312,sharex=ax1, title ='Price ICO 0x, modum and crypto20 using best model R2')
  ax2.hist(R2)
  ax3 = plt.subplot(313,sharex=ax1, title ='Price ICO 0x, modum and crypto20 using best model MAE')
  ax3.hist(MAE)
  plt.tight_layout()

In [None]:
def printPlotCoef(feauteNames, model):
    d = model.coef_[0:12]
    print("Coefficient: ", d)
    print("Input: ", feauteNames) 
    data = { feauteNames[0]: d[0], feauteNames[1]: d[1], feauteNames[2]: d[2], feauteNames[3]: d[3], feauteNames[4]: d[4], feauteNames[5]: d[5], feauteNames[6]: d[6],
            feauteNames[7]: d[7], feauteNames[8]: d[8], feauteNames[9]: d[9], feauteNames[10]: d[10], feauteNames[11]: d[11]}  
    coef = pd.Series(data)
    plt.figure(figsize=(10, 5))
    plt.xlim(-1, 1)
    plt.ylim(-1, 1)
    coef.head(12).plot(kind='bar')
    plt.title('Feature Significance')
    plt.tight_layout()    

In [None]:
def makePrediction(model,example_to_predict):
   encoded_x, encodedCategoryArray = encodeSingleElement(x,example_to_predict)
   y_pred = model.predict(encoded_x.reshape(1, -1))    
   return y_pred

In [None]:
def getCovarianceMatrixAndPrintScatterPlot(x, y, saveToFile=True):
    nrows = x.shape[0] 
    ncols = x.shape[1]
    for i in np.arange(ncols):
        corCoef = np.corrcoef(x[:,i], y) 
        plt.xlabel(featureNames[i])
        plt.ylabel("Price after 6 months ($) ")
        plt.suptitle('Scatter Plot of feature {:s} vs Price after 6 months'.format(featureNames[i]))
        corr = "Correlation Coefficient: "+str(corCoef[0,1])
        plt.title(corr)
        plt.scatter(x[:,i], y)
        if(saveToFile == True):
            exist= createFolderIfDoesntExist("IMAGE_SCATTER/")
            plt.savefig('IMAGE_SCATTER/{:s}_vs_Price_scatter_plot.png'.format(featureNames[i]), dpi = 300)                    
            plt.show()
        
        plt.clf() 

### **CREATE THE NEED FOLDER**

CREATE FOLDER RESULT, IN THIS FOLDER WILL CONTAIN MODELS

In [None]:
createResultsFolderIfDoesntExist("RESULT/")

DOWNLOAD DATASET FROM GITHUB

In [None]:
loadDataFromGithub()

### **LOADING ICO DATASET INTO VARIABLE**

In [None]:
(featureNames, x, y, icoNames, data_com) = load_DatasetUsePanDas()

### **HỆ SỐ TƯƠNG QUAN GIỮA CÁC INPUT VỚI OUTPUT**
Hệ số tương quan thê hiển độ quan hệ tuyến tính giữa 2 biến không phân biệt biến này phụ thuộc biến kia.

**Đặc tính:**
 - Hệ số tương quan không có đơn vị.
 - Nằm trong khoàng [-1,1].

  ![alt text](https://i.imgur.com/72RWtXW.png)
 - r > 0: Tương quan dương.
 - r < 0: Tương quan âm.
 - r = 0: Không tương quan.
 Đánh giá tương quan giữa 2 biến định lượng.

 ![alt text](https://i.imgur.com/JpGBUGC.png)

 

**Calculate the correlation coefficient between the input and output**

In [None]:
getCovarianceMatrixAndPrintScatterPlot(x, y)

The graph below shows the multicollinearity, specifically the highly correlated inputs, those highly correlated numbers will be highlighted.
![alt text](https://i.imgur.com/In7lpb4.png)


### CORRECTIVE ANALYSIS

In [None]:
data = pd.DataFrame(data_com, columns= ['price_usd', 'price_btc', 'total_supply', 'market_cap_usd', 'available_supply', 'usd_raised', 'eth_price_launch',
                                        'btc_price_launch', 'ico_duration',
                                        'month','day','county', 'output'])
data.corr(method ='pearson').round(2)

In [None]:
plt.figure(figsize=(10,8))
plot = sns.heatmap(data.corr().round(2), annot=True)
plot.set_title("The graph shows the full correlation")
plt.show()

### **DATA DISTRIBUTION AND ECOLOGY**
Use one hot encoding to encode the data fields: ICO Duration, ICO Date, ICO month launched and ICO country.

In [None]:
"""
DATA CODE AND CLASSIFICATION AND DATA CLASSIFICATION
"""
def encodeData(x):
    enc = OneHotEncoder(handle_unknown='error', sparse=False)
    # One hot encoding for 4 4 data fields ICO Date, ICO month launched and ICO country.
    encodedCategoryArray= enc.fit_transform(x[:,8:12])
    # Remove those 4 fields from the dataset.
    allInputsExceptCategorical = np.delete(x, np.s_[8:12], axis=1)
    encodedX = np.concatenate((allInputsExceptCategorical,encodedCategoryArray),axis=1)
    return encodedX

def encodeSingleElement(x, sample):
    enc = OneHotEncoder(handle_unknown='error', sparse=False)
    encodedCategoryArray= enc.fit(x[:,8:12])
    allInputsExceptCategorical = np.delete(sample, np.s_[8:12], axis=0)    
    encodedCategories  = enc.transform(sample[8:12].reshape(1, -1))
    encodedX = np.concatenate((allInputsExceptCategorical,encodedCategories.flatten()),axis=0)
    return (encodedX, encodedCategoryArray)

In [None]:
encodeX = encodeData(x)

### **Split DATASET into TRAIN SET AND TEST SET**
80% for the training set and 20% for the test set. Then in the training set also divide 80/20 to calculate.

In [None]:
X_cross, X_test_holdout, y_cross, y_test_holdout = train_test_split(encodeX, y, test_size=0.2, random_state = 42)

# **Ridge Regession**

---


Split the data into 80/20 from the Ridge Regression train set

In [None]:
def ridgeRegression(x, y):       
    X_train, X_val, y_train, y_val = train_test_split(x, y, test_size=0.2)
    regressor_Ridge = linear_model.RidgeCV(gcv_mode='svd', alphas = [0.6, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 10],
                                            normalize=True)
    regressor_Ridge.fit(X_train, y_train)
    bestRegressionCoef_Ridge = regressor_Ridge.coef_    
    y_pred_Ridge = regressor_Ridge.predict(X_val)    
    r2_Ridge = r2_score(y_val, y_pred_Ridge)    
    rMeanSquaredError_Ridge = sqrt(mean_squared_error(y_val, y_pred_Ridge))
    meanAbsoluteError = mean_absolute_error(y_val, y_pred_Ridge)      
    return (regressor_Ridge, rMeanSquaredError_Ridge, r2_Ridge, meanAbsoluteError, bestRegressionCoef_Ridge)

## **COMPUTER TRAINING ERROR AND TEST ERROR**
Find out if the model is overfitting

In [None]:
def trainingError(x,y):   
    regressor = linear_model.LinearRegression()
    regressor.fit(x, y)  
    y_pred_training = regressor.predict(x)
    trainingError = mean_squared_error(y, y_pred_training)     
    return (regressor, trainingError, y_pred_training) 

In [None]:
def testError(x, y, regressor):
    y_pred_holdout = regressor.predict(x)    
    test_error = mean_squared_error(y, y_pred_holdout)       
    return test_error, y_pred_holdout

#### **OVERFITTING**
is a perfect or almost complete match for the train set but is very bad on the test set.

In [None]:
regressor, trainingError, y_pred_training = trainingError(X_cross, y_cross)  
test_error, y_pred_holdout = testError(X_test_holdout, y_test_holdout, regressor)
print("TRAINING ERROR: {:f}".format(trainingError))
heading = "Training Error: " + str(trainingError)
plotExpectedVsPredictedOutput2(y_cross, y_pred_training, fileName = heading, saveToFile = True)
line_plot(y_cross, y_pred_training, title= heading, lw=2)
print("TEST ERROR: {:f}".format(test_error))
heading = "Test Error: " + str(test_error)
print("Predicted value: \n ")
print(y_pred_holdout)
print("Actual value: \n ")
print(y_test_holdout)

plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout, fileName = heading, saveToFile = True)
line_plot(y_test_holdout, y_pred_holdout, title=heading, lw=2)
# ICO 0x HAS A PREDICTED VALUE AFTER 6 MONTHS IS 1.08
ico_0x= np.array([1.71456, 0.00019931, 1000000000, 905793616, 528295082, 24000000, 297.63, 3420.4, 7, 8, 9, 182])
y_pred1 = makePrediction(regressor, ico_0x)
print("Predicted value ICO 0x (Actual value: 1.08) after 6 months: ", y_pred1)

## **RESULTS OPTIMIZE FUNCTION**

In [None]:
"""
SAVE BEST MODEL
"""
def saveBestModel(model,fileName):
    exists = os.path.isfile(fileName)    
    if exists:    
        joblib.load(fileName)
    else:
        joblib.dump(model, fileName)
        return    
    
    y_pred_holdout_saved_model = joblib.load(fileName).predict(X_test_holdout)
    rSquared_saved_model = r2_score(y_test_holdout, y_pred_holdout_saved_model)
    rMse_saved_model = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_saved_model))
    mAE_saved_model = mean_absolute_error(y_test_holdout, y_pred_holdout_saved_model)
    
   
    y_pred_holdout_new_model = model.predict(X_test_holdout)
    rSquared_new_model = r2_score(y_test_holdout, y_pred_holdout_new_model)
    rMse_new_model = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_new_model))
    mAE_new_model = mean_absolute_error(y_test_holdout, y_pred_holdout_new_model)
    
    if(str(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR.name) in fileName):
        if(rMse_new_model < rMse_saved_model):
            joblib.dump(model, fileName) 
    elif(str(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR.name) in fileName):
        if(mAE_new_model < mAE_saved_model):
            joblib.dump(model, fileName) 
    else:
        if(rSquared_new_model > rSquared_saved_model and rSquared_new_model < 1):
            joblib.dump(model, fileName)  

In [None]:
def Optimize(metricToOptimize, numIterations, x, y, showOutput = True):
     
    # VALUE R2, rMSE, MAE OF RIDGE REGRESSION
    maxRSquared_Ridge = -10000
    rMSEMin_Ridge = 10000
    meanABError_min = 10000   

    crossValidationMeanSquaredError = 1
    crossValidationMeanAbsoluteError = 1
    crossValidationR2Squared = 1
    bestRidgeModel = None 

    plotrMSE = []
    plotR2 = []
    plotMAE = []  

    # HỆ SỐ HỒI QUY KHI SỬ DỤNG HỒI QUY RIDGE    
    bestRegressionCoef_temp_Ridge = np.array([])
    bestRegressionCoef_rMSE_Ridge = np.array([])
    bestRegressionCoef_R2_Ridge = np.array([])
    bestRegressionCoef_MAE_Ridge = np.array([])    

    for i in np.arange(numIterations):
        bestRegression_Ridge, rootMeanSquaredError_Ridge, rSquared_Ridge, meanAbsoluteError, bestRegressionCoef_temp_Ridge  = ridgeRegression(x, y)
              
#       TÌM GIÁ TRỊ rMSE TỐT NHẤT
        if(metricToOptimize == LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR):
            if(rootMeanSquaredError_Ridge < rMSEMin_Ridge):
                rMSEMin_Ridge = rootMeanSquaredError_Ridge              
                crossValidationMeanSquaredError = rootMeanSquaredError_Ridge
                plotrMSE.append(rootMeanSquaredError_Ridge)               
                bestRegressionCoef_rMSE_Ridge = bestRegressionCoef_temp_Ridge 
                bestRidgeModel = bestRegression_Ridge
            if(showOutput):
                print(i," bestrMSE: ", rMSEMin_Ridge)

        elif(metricToOptimize == LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR):
            if(meanAbsoluteError < meanABError_min):
                meanABError_min = meanAbsoluteError
                plotMAE.append(meanAbsoluteError)                             
                bestRegressionCoef_MAE_Ridge = bestRegressionCoef_temp_Ridge
                crossValidationMeanAbsoluteError = meanAbsoluteError
                bestRidgeModel = bestRegression_Ridge                   
            if(showOutput):
                print(i," bestrMAE: ", meanABError_min)                      
#      Tìm GIÁ TRỊ R2 TỐT NHẤT
        else:
            if(rSquared_Ridge > maxRSquared_Ridge and rSquared_Ridge > 0):
                maxRSquared_Ridge = rSquared_Ridge               
                bestRegressionCoef_R2_Ridge = bestRegressionCoef_temp_Ridge
                plotR2.append(rSquared_Ridge)  
                crossValidationR2Squared = rSquared_Ridge
                bestRidgeModel = bestRegression_Ridge                  
            if(showOutput):
                print(i,"bestRSquared:",maxRSquared_Ridge)   
   
   
          
    if(metricToOptimize == LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR):
        print(" \n Best rMSE: {:f} \n ,after running linear regression {:d} different times with different test and training combinations"
              .format(rMSEMin_Ridge,numIterations))
        print("Value alpha: ", bestRidgeModel.alpha_)     
        line_plot2(plotrMSE, title='Optimize value rMSE on validation data', 
                   label1='rMSE')      
        
    elif(metricToOptimize == LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR):
        print(" \n Best MAE: {:f} \n ,after running linear regression {:d} different times with different test and training combinations"
              .format(meanABError_min,numIterations))
        print("Value alpha: ", bestRidgeModel.alpha_)    
        line_plot2(plotMAE, title='Optimize value MAE on validation data',
                    label1='MAE')    
    else: 
       print(" \n Best RSquared: {:f} \n ,after running linear regression {:d} different times with different test and training combinations"
              .format(maxRSquared_Ridge,numIterations))
       print("Value alpha: ", bestRidgeModel.alpha_)    
       line_plot2(plotR2, title='Optimize value R2 on validation data', 
                   label1='R2')   
     
    if(showOutput):
        print(" \n Best regression coef rMSE: ", bestRegressionCoef_rMSE_RG,"\n")
        print(" \n Best regression coef R2: ", bestRegressionCoef_R2_RG,"\n")
        print(" \n Best regression coef MAE: ", bestRegressionCoef_MAE_Ridge,"\n")
    
    fileName_Ridge = "RESULT/" + "bestRegressionModel_Ridge_" + str(metricToOptimize.name) + ".sav"   
    saveBestModel(bestRidgeModel, fileName_Ridge)   

In [None]:
ico_0x = np.array([1.71456, 0.00019931, 1000000000, 905793616, 528295082, 24000000, 297.63, 3420.4, 7, 8, 9, 182])
ico_modum = np.array([2.36319, 0.00027471, 27266200, 43166501, 18266200, 13500000, 383.47, 4701.76, 21, 9, 1, 33])
ico_crypto20 = np.array([1.68825, 0.00019625, 40656082, 66992446, 39681591, 38222081, 308.89, 4369.35, 54, 10, 7, 196])
def Optimize2(numIterations, x, y):
    for i in np.arange(numIterations):
        bestRegression_Ridge, rootMeanSquaredError_Ridge, rSquared_Ridge, meanAbsoluteError, bestRegressionCoef_temp_Ridge  = ridgeRegression(x, y)
        y_pred_ico0x = makePrediction(bestRegression_Ridge, ico_0x)
        y_pred_modum = makePrediction(bestRegression_Ridge, ico_modum)
        y_pred_crypto20 = makePrediction(bestRegression_Ridge, ico_crypto20)
        y_pred_holdout = bestRegression_Ridge.predict(X_test_holdout) 
        rMSE_holdout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout))
        r2_holdout = r2_score(y_test_holdout, y_pred_holdout)
        holdout_MAE = mean_absolute_error(y_test_holdout, y_pred_holdout)
        print("TIME: ",i)
        print("Price ICO 0x (REAL: 1.08): ", y_pred_ico0x)
        print("Price ICO modum (REAL: 2.79): ", y_pred_modum)
        print("Price ICO crypto20 (REAL: 0.9876): ", y_pred_crypto20)
        print("Value rMSE: ", rMSE_holdout)
        print("Value R2: ", r2_holdout)
        print("Value MAE: ", holdout_MAE)
        print("**********************************************************************************")
        print("\n")

## **COMPARATES BETWEEN RIDGE REVOLUTION AND MULTI-Variable Regression**
After running the combination of 10000 different times with the training set

In [None]:
n = 10000
showOutput = False
saveToFile = False
Optimize(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR, n , X_cross, y_cross, showOutput)

In [None]:
n = 10000
Optimize(LineaReggressionMetrics.R_SQUARED, n, X_cross, y_cross, showOutput)
#  https://stats.stackexchange.com/questions/12900/when-is-r-squared-negative

In [None]:
n = 10000
Optimize(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR, n, X_cross, y_cross, showOutput)

Compare the two approaches:
- Find the optimal model for each parameter rMSE, R2, MAE.
- Go get the model when training to predict and see which value is closest to reality, then choose and then deduce 3 optimal values.

In [None]:
Optimize2(100, X_cross, y_cross)

### **LOAD SAVE MODEL AND DRAW CHART**

In [None]:
# LOAD MODEL USING RIDGE - rMSE
fileName1 = "RESULT/" + "bestRegressionModel_Ridge_" + str(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR.name) + ".sav"
bestRegression_rMSE_Ridge = loadModel(fileName1)
saveToFile = True
y_pred_holdout_rMSE = []
y_pred_holdout_R2 = []
y_pred_holdout_MAE = []

holdOutMeanSquaredError = 0.0
holdOutValidationR2Squared = 0.0
holdOutMeanABError = 0.0

if(bestRegression_rMSE_Ridge):
    y_pred_holdout_rMSE = bestRegression_rMSE_Ridge.predict(X_test_holdout)  
    holdOutMeanSquaredError = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_rMSE))
    print("Predicted value when using best model rMSE: ")
    print(y_pred_holdout_rMSE)   
    print("Actual value: ")
    print(y_test_holdout)
    heading = "Best_rMSE_Ridge_Regression_" + str(holdOutMeanSquaredError)
    plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout_rMSE, heading, saveToFile)
    #plotExpectedVsPredictedOutput(y_test_holdout, y_pred_holdout_rMSE, heading, saveToFile)
    line_plot(y_test_holdout, y_pred_holdout_rMSE, title=heading, lw=2)
    print("Regression coefficient when using best model rMSE: ")  
    print(bestRegression_rMSE_Ridge.coef_)
    print("************************************************************************************************")
    print("\n")

# LOAD MODEL USING RIDGE - R2
fileName3 = "RESULT/" + "bestRegressionModel_Ridge_" + str(LineaReggressionMetrics.R_SQUARED.name) + ".sav"
bestRegression_rSquared_Ridge = loadModel(fileName3)

if(bestRegression_rSquared_Ridge):
    y_pred_holdout_R2 = bestRegression_rSquared_Ridge.predict(X_test_holdout)   
    holdOutValidationR2Squared = r2_score(y_test_holdout, y_pred_holdout_R2)
    print("Predicted value when using best model R2: ")
    print(y_pred_holdout_R2)
    print("Actual value: ")
    print(y_test_holdout)
    heading = "Best_rRSquared_Ridge_Regression_" + str(holdOutValidationR2Squared)
    plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout_R2,heading, saveToFile)
    #plotExpectedVsPredictedOutput(y_test_holdout, y_pred_holdout_R2,heading, saveToFile)
    line_plot(y_test_holdout, y_pred_holdout_R2, title=heading, lw=2)
    print("Regression coefficient when using best model R2: ")
    print(bestRegression_rSquared_Ridge.coef_) 
    print("************************************************************************************************")
    print("\n")

fileName4 = "RESULT/" + "bestRegressionModel_Ridge_" + str(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR.name) + ".sav"
bestRegression_MAE_Ridge = loadModel(fileName4)

if(bestRegression_MAE_Ridge):
    y_pred_holdout_MAE = bestRegression_MAE_Ridge.predict(X_test_holdout)   
    holdOutMeanABError = mean_absolute_error(y_test_holdout, y_pred_holdout_MAE)
    print("Predicted value when using best model MAE: ")
    print(y_pred_holdout_MAE)
    print("Actual value: ")
    print(y_test_holdout)
    heading = "Best_MAE_Ridge_Regression_" + str(holdOutMeanABError)
    plotExpectedVsPredictedOutput2(y_test_holdout,y_pred_holdout_MAE,heading,saveToFile)
    #plotExpectedVsPredictedOutput(y_test_holdout,y_pred_holdout_MAE,heading,saveToFile)
    line_plot(y_test_holdout, y_pred_holdout_MAE, title=heading, lw=2)    
    print("Regression coefficient when using best model MAE: ")
    print(bestRegression_rSquared_Ridge.coef_)

plotResult(y_test_holdout, y_pred_holdout_rMSE, y_pred_holdout_R2, y_pred_holdout_MAE, rMSE = str(holdOutMeanSquaredError), 
           R2 = str(holdOutValidationR2Squared), MAE = str(holdOutMeanABError))

## **Using MODEL FOR PROJECT - VALUE OF ICO AFTER 6 MONTHS**

In [None]:
# LOAD MODEL WITH rMSE VALUE IS THE BEST
fileName1 = "RESULT/" + "bestRegressionModel_Ridge_" + str(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR.name) + ".sav"
bestRegression_rMSE_Ridge = joblib.load(fileName1)
price_rMSE = []
price_R2 = []
price_MAE = []
#price_usd,price_btc,total_supply,market_cap_usd,available_supply,usd_raised,eth_price_launch,btc_price_launch,ico_duration,month,day,country
ico_0x = np.array([1.71456, 0.00019931, 1000000000, 905793616, 528295082, 24000000, 297.63, 3420.4, 7, 8, 9, 182])
ico_modum = np.array([2.36319, 0.00027471, 27266200, 43166501, 18266200, 13500000, 383.47, 4701.76, 21, 9, 1, 33])
ico_crypto20 = np.array([1.68825, 0.00019625, 40656082, 66992446, 39681591, 38222081, 308.89, 4369.35, 54, 10, 7, 196])
ico_wanchin = np.array([6.71639, 0.00078075, 210000000, 712961540, 106152493, 35704520, 297.48, 4408.46, 1, 10, 3, 182])
y_pred1 = makePrediction(bestRegression_rMSE_Ridge, ico_0x)
y_pred2 = makePrediction(bestRegression_rMSE_Ridge, ico_modum)
y_pred3 = makePrediction(bestRegression_rMSE_Ridge, ico_crypto20)
price_rMSE.append(y_pred1)
price_rMSE.append(y_pred2)
price_rMSE.append(y_pred3)
y_pred_holdout_rMSE = bestRegression_rMSE_Ridge.predict(X_test_holdout) 
rMSE_holdout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_rMSE))
r2_holdout = r2_score(y_test_holdout, y_pred_holdout_rMSE)
holdout_MAE  = mean_absolute_error(y_test_holdout, y_pred_holdout_rMSE)
print("Price of 3 ICO when using best mode rMSE")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months: ",y_pred1)
print("Predicted value of ICO modum (real price: 2.79) after 6 months: ",y_pred2)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months: ",y_pred3)
print("Value rMSE, R2, MAE of best model rMSE")
print("Value rMSE: ", rMSE_holdout)
print("Value R2: ", r2_holdout)
print("Value MAE: ", holdout_MAE)
print("*******************************************************************************************")
print("\n")

# LOAD MODEL WITH R2 VALUE IS THE BEST
fileName2 = "RESULT/" + "bestRegressionModel_Ridge_" + str(LineaReggressionMetrics.R_SQUARED.name) + ".sav"
bestRegression_rSquared_Ridge = joblib.load(fileName2)
y_pred4 = makePrediction(bestRegression_rSquared_Ridge, ico_0x)
y_pred5 = makePrediction(bestRegression_rSquared_Ridge, ico_modum)
y_pred6 = makePrediction(bestRegression_rSquared_Ridge, ico_crypto20)
price_R2.append(y_pred4)
price_R2.append(y_pred5)
price_R2.append(y_pred6)
y_pred_holdout_R2 = bestRegression_rSquared_Ridge.predict(X_test_holdout) 
rMSE_holdout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_R2))
r2_holdout = r2_score(y_test_holdout, y_pred_holdout_R2)
holdout_MAE  = mean_absolute_error(y_test_holdout, y_pred_holdout_R2)
print("Price of 3 ICO when using best mode R2")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months: ",y_pred4)
print("Predicted value of ICO modum (real price: 2.79) after 6 months: ",y_pred5)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months: ",y_pred6)
print("Value rMSE, R2, MAE of best model R2")
print("Value rMSE: ", rMSE_holdout)
print("Value R2: ", r2_holdout)
print("Value MAE: ", holdout_MAE)
print("*******************************************************************************************")
print("\n")

# LOAD MODEL WITH THE BEST MAE VALUE
fileName3 = "RESULT/" + "bestRegressionModel_Ridge_" + str(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR.name) + ".sav"
bestRegression_MAE_Ridge = joblib.load(fileName3)
y_pred7 = makePrediction(bestRegression_MAE_Ridge, ico_0x)
y_pred8 = makePrediction(bestRegression_MAE_Ridge, ico_modum)
y_pred9 = makePrediction(bestRegression_MAE_Ridge, ico_crypto20)
price_MAE.append(y_pred7)
price_MAE.append(y_pred8)
price_MAE.append(y_pred9)
y_pred_holdout_MAE = bestRegression_MAE_Ridge.predict(X_test_holdout) 
rMSE_holdout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_MAE))
r2_holdout = r2_score(y_test_holdout, y_pred_holdout_MAE)
holdout_MAE  = mean_absolute_error(y_test_holdout, y_pred_holdout_MAE)
print("Price of 3 ICO when using best mode MAE")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months: ",y_pred7)
print("Predicted value of ICO modum (real price: 2.79) after 6 months: ",y_pred8)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months: ",y_pred9)
print("Value rMSE, R2, MAE of best model MAE")
print("Value rMSE: ", rMSE_holdout)
print("Value R2: ", r2_holdout)
print("Value MAE: ", holdout_MAE)
print("*******************************************************************************************")
print("\n")
plotPredicted(price_rMSE, price_R2, price_MAE)

# **Neural Network**

Functions used to run NN with many different configurations.

In [None]:
class Activations(Enum):
    identity = 1
    logistic = 2
    tanh    = 3
    relu = 4


In [None]:
## SAVE MODEL IF THE MODEL IS BETTER THAN THE BEFORE SAVE MODEL
def saveBestModelNN(model, fileName):
    exists = os.path.isfile(fileName)
    if exists:    
        # LOAD SAVE MODEL
        bestSavedNNModel = joblib.load(fileName)
    else:
        # SAVE CURRENT MODEL
        joblib.dump(model, fileName)
        return
    y_pred_holdout_saved_model = bestSavedNNModel.predict(X_test_holdout)
    rSquared_saved_model = r2_score(y_test_holdout, y_pred_holdout_saved_model)
    rMse_saved_model = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_saved_model))
    mAE_saved_model = mean_absolute_error(y_test_holdout, y_pred_holdout_saved_model)
    
    y_pred_holdout_new_model = model.predict(X_test_holdout)
    rSquared_new_model = r2_score(y_test_holdout, y_pred_holdout_new_model)
    rMse_new_model = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_new_model))
    mAE_new_model = mean_absolute_error(y_test_holdout, y_pred_holdout_new_model)  
    
    if(str(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR.name) in fileName):
        if(rMse_new_model < rMse_saved_model):
            joblib.dump(model, fileName) 
    elif(str(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR.name) in fileName):
        if(mAE_new_model < mAE_saved_model):
           joblib.dump(model, fileName)  
    else:
        if(rSquared_new_model > rSquared_saved_model and rSquared_new_model > 0):
            joblib.dump(model, fileName)   


In [None]:
# runNN FUNCTION TO CALCULATE AND RETURN VALUES R2, rMSE, model, y_val are validation
def runNN(x_input, y_input, hidden_layer_size, chosen_solver, chosen_activation):
    X_train, X_val, y_train, y_val = train_test_split(x_input, y_input, test_size = 0.2, random_state = 0)

    mlp = MLPRegressor(hidden_layer_sizes=(hidden_layer_size, hidden_layer_size, hidden_layer_size), max_iter= 60000, 
                       activation=chosen_activation, solver=chosen_solver, learning_rate='adaptive', tol=0.001)
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_val)  

    R2coefficient = r2_score(y_val, y_pred)
    rMeanSquaredError = sqrt(mean_squared_error(y_val, y_pred))
    meanAbsoluteError = mean_squared_error(y_val, y_pred)
    
    return R2coefficient, rMeanSquaredError, meanAbsoluteError, mlp

In [None]:
def runNeuralNetOptimizer(metricToOptimize, x_input, y_input, maxIterations, maxHiddenLayerSize, activation, solver, hiddenLayerStepSize=100, showOutput=False):
    bestNeuralNetworkModel = None 
    minrMSE = 10000
    maxR2coefficient = -10000
    minMAE = 10000
    plotrMSE = []
    plotR2 = []
    plotMAE = []
 
    
    # RUN NN WITH DIFFERENT CLASS SIZES 
    for i in np.arange(maxIterations):
             for hidden_layer_size in np.arange(1, maxHiddenLayerSize, hiddenLayerStepSize):               
                chosen_activation = activation
                chosen_solver = solver
                currentR2coefficient, currentrMSE, currentMAE, neuralNetworkModel = runNN(x_input, y_input, hidden_layer_size, chosen_solver, chosen_activation)
               
                if(metricToOptimize == LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR):
                    if(minrMSE > currentrMSE ):
                        minrMSE = currentrMSE
                        minMAE = currentMAE
                        maxR2coefficient = currentR2coefficient
                        plotrMSE.append(currentrMSE)                        
                        bestNeuralNetworkModel = neuralNetworkModel                        
                        
                elif(metricToOptimize == LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR):
                    if(minMAE > currentMAE):
                         minrMSE = currentrMSE
                         minMAE = currentMAE
                         maxR2coefficient = currentR2coefficient 
                         bestNeuralNetworkModel = neuralNetworkModel
                         plotMAE.append(currentMAE)                                          
                else:
                    if(maxR2coefficient < currentR2coefficient and currentR2coefficient > 0):
                         minrMSE = currentrMSE
                         minMAE = currentMAE
                         maxR2coefficient = currentR2coefficient 
                         bestNeuralNetworkModel = neuralNetworkModel
                         plotR2.append(currentR2coefficient)               
                                                  
                        
    if(bestNeuralNetworkModel):
        print("Best rMSE for NN, using validation data: " + str(minrMSE))
        print("Best rSquared for NN, using validation data: " + str(maxR2coefficient))
        print("Best MAE for NN, using validation data: " + str(minMAE))

        #Best values for NN using cross validation
        y_pred_holdout = bestNeuralNetworkModel.predict(X_test_holdout)       
        if(metricToOptimize == LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR):
            rMeanSquaredErrorHoldout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout))
            print("Best rMSE for NN, using holdout data: " + str(rMeanSquaredErrorHoldout))
            line_plot2(plotrMSE, title='Optimize value rMSE validation data when using solver ' + solver, 
                   label1='rMSE') 
        elif(metricToOptimize == LineaReggressionMetrics.R_SQUARED):
            R2coefficientHoldout = r2_score(y_test_holdout, y_pred_holdout)
            print("Best rSquared for NN, using holdout data: " + str(R2coefficientHoldout))
            line_plot2(plotR2, title='Optimize value R2 validation data when using solver ' + solver, 
                   label1='R2') 
        else:
            meanAbsoluteErrorHoldout= mean_absolute_error(y_test_holdout, y_pred_holdout)
            print("Best MAE for NN, using holdout data: " + str(meanAbsoluteErrorHoldout))
            line_plot2(plotMAE, title='Optimize value MAE validation data when using solver ' + solver, 
                   label1='MAE')   
        
        #Save best model
        fileName = "RESULT/" + "bestNNModel_" + str(metricToOptimize.name) + solver + ".sav"
        saveBestModelNN(bestNeuralNetworkModel,  fileName)

In [None]:
def runNeuralNetOptimizations2(x_input, y_input, maxIterations, maxHiddenLayerSize, activation, solver, metricToOptimize, hiddenLayerStepSize=100,showOutput=False):
    runNeuralNetOptimizer(metricToOptimize, x_input, y_input, maxIterations, maxHiddenLayerSize, activation, solver, hiddenLayerStepSize,showOutput)

In [None]:
maxIterations = 5
maxHiddenLayerSize = 1000
hiddenLayerStepSize = 100

In [None]:
def findBestParametersNN(metricToOptimize):
    y_preds = np.array([])
    rMse = np.array([])
    y_actual = np.array([])
    
    # Using tanh activation function, with Adam Solver
    print("Using tanh activation function, with Adam Solver")
    runNeuralNetOptimizations2(X_cross, y_cross, maxIterations, maxHiddenLayerSize,
                               Activations.tanh.name, Solvers.adam.name, metricToOptimize, hiddenLayerStepSize)
    print("\n")
    
    # Using tanh activation function, with Gradient Descent Solver
    print("Using tanh activation function, with Gradient Descent Solver")
    runNeuralNetOptimizations2(X_cross,y_cross, maxIterations, maxHiddenLayerSize,
                               Activations.tanh.name, Solvers.sgd.name, metricToOptimize, hiddenLayerStepSize)
    print("\n")


## FIND OUT THE BEST rMSE VALUES

In [None]:
findBestParametersNN(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR)

### FIND THE BEST R2 VALUE

In [None]:
findBestParametersNN(LineaReggressionMetrics.R_SQUARED)

##TÌM RA GIÁ TRỊ MAE TỐT NHẤT

In [None]:
findBestParametersNN(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR)

## OPTIMIZED LOAD MODEL FOR SAVE AND PERFORMANCE CALCULATION

### PERFORMANCE MODEL WITH SOLVER IS ADAM

In [None]:
fileName = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR.name) + str(Solvers.adam.name) + ".sav"
bestNNModel_rMse = loadModel(fileName)
y_pred_holdout_rMSE_NN = None
y_pred_holdout_R2_NN = None
y_pred_holdout_MAE_NN = None

if(bestNNModel_rMse):
    y_pred_holdout_rMSE_NN = bestNNModel_rMse.predict(X_test_holdout)
    holdOutMeanSquaredError = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_rMSE_NN))
    heading = "Best_rMSE_Neural_Networks_" + str(holdOutMeanSquaredError) + "_adam"
    print("Predicted value when using best model rMSE (NN with Solver is adam): ")
    print(y_pred_holdout_rMSE_NN)   
    print("Actual value: ")
    print(y_test_holdout)
    line_plot(y_test_holdout, y_pred_holdout_rMSE_NN, title = heading)
    plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout_rMSE_NN, heading, saveToFile)

## LOAD MOEL TO OPTIMIZED R2
fileName = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.R_SQUARED.name) + str(Solvers.adam.name)+ ".sav"
bestNNModel_rSquared = loadModel(fileName)
if(bestNNModel_rSquared):
    y_pred_holdout_R2_NN = bestNNModel_rSquared.predict(X_test_holdout)
    holdOutValidationR2Squared = r2_score(y_test_holdout, y_pred_holdout_R2_NN)
    heading = "Best_rRSquared_Neural_Networks_" + str(holdOutValidationR2Squared) + "_adam"
    print("Predicted value when using best model R2 (NN with Solver is adam): ")
    print(y_pred_holdout_R2_NN)   
    print("Actual value: ")
    print(y_test_holdout)
    line_plot(y_test_holdout, y_pred_holdout_R2_NN, title = heading)
    plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout_R2_NN, heading, saveToFile)

## LOAD MOEL TO OPTIMIZED MAE
fileName = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR.name) + str(Solvers.adam.name) +".sav"
bestNNModel_MAE = loadModel(fileName)
if(bestNNModel_MAE):
    y_pred_holdout_MAE_NN = bestNNModel_MAE.predict(X_test_holdout)
    holdOutMeanABError = mean_absolute_error(y_test_holdout, y_pred_holdout_MAE_NN)
    heading = "Best_MAE_Neural_Networks_" + str(holdOutMeanABError) + "_adam"
    print("Predicted value when using best model MAE (NN with Solver is adam): ")
    print(y_pred_holdout_MAE_NN)   
    print("Actual value: ")
    print(y_test_holdout)
    line_plot(y_test_holdout, y_pred_holdout_MAE_NN, title = heading)
    plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout_MAE_NN, heading, saveToFile)

plotResult(y_test_holdout, y_pred_holdout_rMSE_NN, y_pred_holdout_R2_NN, y_pred_holdout_MAE_NN, 
           rMSE = str(holdOutMeanSquaredError),R2 = str(holdOutValidationR2Squared), MAE = str(holdOutMeanABError))

### HIỆU NĂNG MODEL SỬ DỤNG SOLVER LÀ SGD

In [None]:
fileName = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR.name) + str(Solvers.sgd.name) + ".sav"
bestNNModel_rMse = loadModel(fileName)

if(bestNNModel_rMse):
    y_pred_holdout_rMSE_NN = bestNNModel_rMse.predict(X_test_holdout)
    holdOutMeanSquaredError = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout_rMSE_NN))
    heading = "Best_rMSE_Neural_Networks_" + str(holdOutMeanSquaredError) + "_sgd"
    print("Predicted value when using best model rMSE (NN with Solver is sgd): ")
    print(y_pred_holdout_rMSE_NN)   
    print("Actual value: ")
    print(y_test_holdout)
    line_plot(y_test_holdout, y_pred_holdout_rMSE_NN, title = heading)
    plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout_rMSE_NN, heading, saveToFile)

## LOAD MOEL TO OPTIMIZED R2
fileName = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.R_SQUARED.name) + str(Solvers.sgd.name)+ ".sav"
bestNNModel_rSquared = loadModel(fileName)
if(bestNNModel_rSquared):
    y_pred_holdout_R2_NN = bestNNModel_rSquared.predict(X_test_holdout)
    holdOutValidationR2Squared = r2_score(y_test_holdout, y_pred_holdout_R2_NN)
    heading = "Best_rRSquared_Neural_Networks_" + str(holdOutValidationR2Squared) + "_sgd"
    print("Predicted value when using best model R2 (NN with Solver is sgd): ")
    print(y_pred_holdout_R2_NN)   
    print("Actual value: ")
    print(y_test_holdout)
    line_plot(y_test_holdout, y_pred_holdout_R2_NN, title = heading)
    plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout_R2_NN, heading, saveToFile)

## LOAD MOEL TO OPTIMIZED MAE
fileName = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR.name) + str(Solvers.sgd.name) +".sav"
bestNNModel_MAE = loadModel(fileName)
if(bestNNModel_MAE):
    y_pred_holdout_MAE_NN = bestNNModel_MAE.predict(X_test_holdout)
    holdOutMeanABError = mean_absolute_error(y_test_holdout, y_pred_holdout_MAE_NN)
    heading = "Best_MAE_Neural_Networks_" + str(holdOutMeanABError) + "_sgd"
    print("Predicted value when using best model MAE (NN with Solver is sgd): ")
    print(y_pred_holdout_MAE_NN)   
    print("Actual value: ")
    print(y_test_holdout)
    line_plot(y_test_holdout, y_pred_holdout_MAE_NN, title = heading)
    plotExpectedVsPredictedOutput2(y_test_holdout, y_pred_holdout_MAE_NN, heading, saveToFile)

plotResult(y_test_holdout, y_pred_holdout_rMSE_NN, y_pred_holdout_R2_NN, y_pred_holdout_MAE_NN, 
           rMSE = str(holdOutMeanSquaredError),R2 = str(holdOutValidationR2Squared), MAE = str(holdOutMeanABError))

## USE MODEL TO FIND THE PRICE AFTER 6 MONTHS

### FORECAST WITH MODEL USING SOLVER ADAM

In [None]:
# Load model with best rMse and make prediction
plotNNrMSE = []
plotNNR2 = []
plotNNMAE = []
fileName = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR.name) + str(Solvers.adam.name) + ".sav"
bestNN = joblib.load(fileName)

fileName1 = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.R_SQUARED.name) + str(Solvers.adam.name)+ ".sav"
bestNNModel_rS = joblib.load(fileName1)

fileName2 = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR.name) + str(Solvers.adam.name) +".sav"
bestNNModel_MAE = joblib.load(fileName2)

y_pred =  makePrediction(bestNN, ico_0x)
y_pred1 = makePrediction(bestNN, ico_modum)
y_pred2 = makePrediction(bestNN, ico_crypto20)
plotNNrMSE.append(y_pred)
plotNNrMSE.append(y_pred1)
plotNNrMSE.append(y_pred2)
y_pred_holdout = bestNN.predict(X_test_holdout)
R2coefficientHoldout = r2_score(y_test_holdout, y_pred_holdout)
rMeanSquaredErrorHoldout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout))
meanAbsoluteErrorHoldout= mean_absolute_error(y_test_holdout, y_pred_holdout)
print("Price of 3 ICO using best model rMSE")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months using best model rMSE: ",y_pred)
print("Predicted value of ICO modum (real price: 2.79) after 6 months using best model rMSE: ",y_pred1)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months using best model rMSE: ",y_pred2)
print("Value rMSE, R2, MAE of best model rMSE")
print("Value rMSE: ", rMeanSquaredErrorHoldout)
print("Value R2: ", R2coefficientHoldout)
print("Value MAE: ", meanAbsoluteErrorHoldout) 
print("\n")       
print("*************************************************************************************")

y_pred3 = makePrediction(bestNNModel_rS, ico_0x)
y_pred4 = makePrediction(bestNNModel_rS, ico_modum)
y_pred5 = makePrediction(bestNNModel_rS, ico_crypto20)
plotNNR2.append(y_pred3)
plotNNR2.append(y_pred4)
plotNNR2.append(y_pred5)
y_pred_holdout = bestNNModel_rS.predict(X_test_holdout)
R2coefficientHoldout = r2_score(y_test_holdout, y_pred_holdout)
rMeanSquaredErrorHoldout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout))
meanAbsoluteErrorHoldout= mean_absolute_error(y_test_holdout, y_pred_holdout)
print("Price of 3 ICO using best model R2")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months using best model R2: ",y_pred3)
print("Predicted value of ICO modum (real price: 2.79) after 6 months using best model R2: ",y_pred4)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months using best model R2: ",y_pred5)
print("Value rMSE, R2, MAE of best model R2")
print("Value rMSE: ", rMeanSquaredErrorHoldout)
print("Value R2: ", R2coefficientHoldout)
print("Value MAE: ", meanAbsoluteErrorHoldout) 
print("\n")       
print("*************************************************************************************")

y_pred3 = makePrediction(bestNNModel_MAE, ico_0x)
y_pred4 = makePrediction(bestNNModel_MAE, ico_modum)
y_pred5 = makePrediction(bestNNModel_MAE, ico_crypto20)
plotNNMAE.append(y_pred3)
plotNNMAE.append(y_pred4)
plotNNMAE.append(y_pred5)
y_pred_holdout = bestNNModel_MAE.predict(X_test_holdout)
R2coefficientHoldout = r2_score(y_test_holdout, y_pred_holdout)
rMeanSquaredErrorHoldout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout))
meanAbsoluteErrorHoldout= mean_absolute_error(y_test_holdout, y_pred_holdout)
print("Price of 3 ICO using best model MAE")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months using best model MAE: ",y_pred3)
print("Predicted value of ICO modum (real price: 2.79) after 6 months using best model MAE: ",y_pred4)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months using best model MAE: ",y_pred5)
print("Value rMSE, R2, MAE of best model MAE")
print("Value rMSE: ", rMeanSquaredErrorHoldout)
print("Value R2: ", R2coefficientHoldout)
print("Value MAE: ", meanAbsoluteErrorHoldout)  
print("\n")       
print("*************************************************************************************")
plotPredicted(plotNNrMSE, plotNNR2, plotNNMAE)
plotResult2(y_test_holdout, y_pred_holdout_rMSE, y_pred_holdout_R2, y_pred_holdout_MAE, y_pred_holdout_rMSE_NN, 
                y_pred_holdout_R2_NN, y_pred_holdout_MAE_NN, solver = Solvers.adam)

### FORECAST WITH MODEL USING SOLVER SGD

In [None]:
# Load model with best rMse and make prediction
plotNNrMSE = []
plotNNR2 = []
plotNNMAE = []
fileName = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.ROOT_MEAN_SQUARED_ERROR.name) + str(Solvers.sgd.name) + ".sav"
bestNN = joblib.load(fileName)

fileName1 = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.R_SQUARED.name) + str(Solvers.sgd.name)+ ".sav"
bestNNModel_rS = joblib.load(fileName1)

fileName2 = "RESULT/" + "bestNNModel_" + str(LineaReggressionMetrics.MEAN_ABSOLUTE_ERROR.name) + str(Solvers.sgd.name) +".sav"
bestNNModel_MAE = joblib.load(fileName2)

y_pred =  makePrediction(bestNN, ico_0x)
y_pred1 = makePrediction(bestNN, ico_modum)
y_pred2 = makePrediction(bestNN, ico_crypto20)
plotNNrMSE.append(y_pred)
plotNNrMSE.append(y_pred1)
plotNNrMSE.append(y_pred2)
y_pred_holdout = bestNN.predict(X_test_holdout)
R2coefficientHoldout = r2_score(y_test_holdout, y_pred_holdout)
rMeanSquaredErrorHoldout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout))
meanAbsoluteErrorHoldout= mean_absolute_error(y_test_holdout, y_pred_holdout)
print("Price of 3 ICO using best model rMSE")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months using best model rMSE: ",y_pred)
print("Predicted value of ICO modum (real price: 2.79) after 6 months using best model rMSE: ",y_pred1)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months using best model rMSE: ",y_pred2)
print("Value rMSE, R2, MAE of best model rMSE")
print("Value rMSE: ", rMeanSquaredErrorHoldout)
print("Value R2: ", R2coefficientHoldout)
print("Value MAE: ", meanAbsoluteErrorHoldout) 
print("\n")       
print("*************************************************************************************")

y_pred3 = makePrediction(bestNNModel_rS, ico_0x)
y_pred4 = makePrediction(bestNNModel_rS, ico_modum)
y_pred5 = makePrediction(bestNNModel_rS, ico_crypto20)
plotNNR2.append(y_pred3)
plotNNR2.append(y_pred4)
plotNNR2.append(y_pred5)
y_pred_holdout = bestNNModel_rS.predict(X_test_holdout)
R2coefficientHoldout = r2_score(y_test_holdout, y_pred_holdout)
rMeanSquaredErrorHoldout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout))
meanAbsoluteErrorHoldout= mean_absolute_error(y_test_holdout, y_pred_holdout)
print("Price of 3 ICO using best model R2")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months using best model R2: ",y_pred3)
print("Predicted value of ICO modum (real price: 2.79) after 6 months using best model R2: ",y_pred4)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months using best model R2: ",y_pred5)
print("Value rMSE, R2, MAE of best model R2")
print("Value rMSE: ", rMeanSquaredErrorHoldout)
print("Value R2: ", R2coefficientHoldout)
print("Value MAE: ", meanAbsoluteErrorHoldout) 
print("\n")       
print("*************************************************************************************")

y_pred3 = makePrediction(bestNNModel_MAE, ico_0x)
y_pred4 = makePrediction(bestNNModel_MAE, ico_modum)
y_pred5 = makePrediction(bestNNModel_MAE, ico_crypto20)
plotNNMAE.append(y_pred3)
plotNNMAE.append(y_pred4)
plotNNMAE.append(y_pred5)
y_pred_holdout = bestNNModel_MAE.predict(X_test_holdout)
R2coefficientHoldout = r2_score(y_test_holdout, y_pred_holdout)
rMeanSquaredErrorHoldout = sqrt(mean_squared_error(y_test_holdout, y_pred_holdout))
meanAbsoluteErrorHoldout= mean_absolute_error(y_test_holdout, y_pred_holdout)
print("Price of 3 ICO using best model MAE")
print("Predicted value of ICO 0x (real price: 1.08) after 6 months using best model MAE: ",y_pred3)
print("Predicted value of ICO modum (real price: 2.79) after 6 months using best model MAE: ",y_pred4)
print("Predicted value of ICO crypto20 (real price: 0.9786) after 6 months using best model MAE: ",y_pred5)
print("Value rMSE, R2, MAE of best model MAE")
print("Value rMSE: ", rMeanSquaredErrorHoldout)
print("Value R2: ", R2coefficientHoldout)
print("Value MAE: ", meanAbsoluteErrorHoldout)  
print("\n")       
print("*************************************************************************************")
plotPredicted(plotNNrMSE, plotNNR2, plotNNMAE)
plotResult2(y_test_holdout, y_pred_holdout_rMSE, y_pred_holdout_R2, y_pred_holdout_MAE, y_pred_holdout_rMSE_NN, 
                y_pred_holdout_R2_NN, y_pred_holdout_MAE_NN, solver = Solvers.sgd)