# Prédiction d'une valeur d'une action

## Installation de Quandl pour récupérer les données du marché

In [57]:
!pip install -q quandl

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


## Importation des dépendances

In [58]:
%matplotlib inline
import quandl
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

## Récupération des données relatives à une action

In [59]:
quandl.ApiConfig.api_key = "jNQbw1iJrUvoDqHfuu9L"
df = quandl.get("EURONEXT/ALMIL")
# Take a look at the data
print(df.tail())

            Open  High   Low  Last   Volume  Turnover
Date                                                 
2019-12-05  22.5  22.5  22.5  22.5     41.0     923.0
2019-12-06  22.5  22.6  22.3  22.6  13766.0  307018.0
2019-12-09  22.4  22.4  22.2  22.2   1057.0   23513.0
2019-12-10  22.2  22.3  22.2  22.2    596.0   13233.0
2019-12-11  22.3  22.3  21.9  22.0   2053.0   45435.0


## Data preprocessing

In [60]:
# Get the Adjusted Close Price
#df = df[['Adj. Close']]
df = df[['Last']]
# Take a look at the new data
#print(df.head())

# A variable for predicting 'n' days out into the future
forecast_out = 10
# Create another column (the target or dependent variable) shifted 'n' units
#df['Prediction'] = df[['Adj. Close']].shift(-forecast_out)
df['Prediction'] = df[['Last']].shift(-forecast_out)

# Create the independent data set (X)
# Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'], 1))
# Remove the last 'n' rows
X = X[:-forecast_out]

# Create the dependent data set (y)
# Convert the dataframe to a numpy array (all of the values including the NaN's)
y = np.array(df['Prediction'])
# Get all of the y values except the last 'n' rows
y = y[:-forecast_out]

# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Set x_forecast equal to the last 'forecast_out' rows of the original data set from Adj. Close column
x_forecast = np.array(df.drop(['Prediction'], 1))[-forecast_out:]
print(x_forecast)

[[ 23.3]
 [ 23.3]
 [ 23. ]
 [ 22.7]
 [ 22.5]
 [ 22.5]
 [ 22.6]
 [ 22.2]
 [ 22.2]
 [ 22. ]]


## Création et entrainement du modèle avec une Support Vector Machine (Regressor)

In [61]:
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

# Testing Model: Score returns the coefficient of determination R^2 of the prediction
# The best possible score is 1.0
svm_confidence = svr_rbf.score(x_test, y_test)
print("SVM confidence (based on the test dataset):", "%.2f%%" % (svm_confidence * 100))

# Print support vector regressor  model the predictions for the next 'n' days
y_pred = svr_rbf.predict(x_test)
svm_prediction = svr_rbf.predict(x_forecast)
print(svm_prediction)

SVM confidence (based on the test dataset): 95.18%
[ 23.59079734  23.59079734  23.25586797  22.89974135  22.66481168
  22.66481168  22.78116188  22.34410963  22.34410963  22.16968389]


## Création et entrainement du modèle avec une Linear Regression

In [62]:
lr = LinearRegression()
lr.fit(x_train, y_train)

# Testing Model: Score returns the coefficient of determination R^2 of the prediction
# The best possible score is 1.0
lr_confidence = lr.score(x_test, y_test)
print("LR confidence (based on the test dataset):", "%.2f%%" % (lr_confidence * 100))

# Print linear regression model the predictions for the next 'n' days
lr_prediction = lr.predict(x_forecast)
print(lr_prediction)

LR confidence (based on the test dataset): 94.94%
[ 23.42341053  23.42341053  23.13062132  22.83783211  22.64263931
  22.64263931  22.74023571  22.3498501   22.3498501   22.15465729]
