### Possiamo lavorare i nostri dati in ulteriori due modalità alternative legate al mondo del Data Science e particolarmente utilizzate in ambito AI e Machine Learning:

Importiamo dei dati daily per esempio:

In [1]:
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

print(__version__) # necessaria versione >= 1.9.0

import cufflinks as cf

# Per utilizzo con Notebooks
init_notebook_mode(connected=True)

# Per utilizzo offline
cf.go_offline()


filename = "HD_Daily.txt"
startDate = "20130810"
endDate = "20180810"

startDateParsed = datetime.date(int(startDate[0:4]), int(startDate[4:6]), int(startDate[6:]))
endDateParsed = datetime.date(int(endDate[0:4]), int(endDate[4:6]), int(endDate[6:]))

instrument = pd.read_csv(filename,
                         usecols=['Date','Time','Open','High','Low','Close','Vol'], 
                         parse_dates=[['Date', 'Time']])
print("Il file contiene", len(instrument), "record")
instrument.index = instrument['Date_Time']
instrument.drop('Date_Time',axis=1,inplace=True)
instrument['AvgPrice'] = round(instrument.iloc[:,0:4].mean(axis=1),2)
instrument['Range'] = round(instrument['High'] - instrument['Low'],2)
instrument['Body'] = abs(instrument['Open'] - instrument['Close'])
instrument['CloseOpenPerc'] = round((instrument['Close'] - instrument['Open']) / instrument['Open'] * 100,2)
instrument['BodyRangePerc'] = round(instrument['Body']/instrument['Range'] * 100,2)
# Percentuale della barra (all'interno del range) dove si attesta il close
instrument['CloseBarPerc'] = round((instrument['Close'] - instrument['Low']) * 100 / instrument['Range'],2)
instrument['DeltaClosePerc'] = round((instrument['Close'] - instrument['Close'].shift(1)) / instrument['Close'].shift(1) * 100,2)
instrument['DeltaAvgPricePerc'] = round((instrument['AvgPrice'] - instrument['AvgPrice'].shift(1)) / instrument['AvgPrice'].shift(1) * 100,2)

instrument = instrument.loc[startDateParsed:endDateParsed]
instrument.head(10)

4.7.1


Il file contiene 4682 record


Unnamed: 0_level_0,Open,High,Low,Close,Vol,AvgPrice,Range,Body,CloseOpenPerc,BodyRangePerc,CloseBarPerc,DeltaClosePerc,DeltaAvgPricePerc
Date_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2013-08-12 16:00:00,78.62,79.54,78.58,79.2,3844497,78.98,0.96,0.58,0.74,60.42,64.58,0.29,-0.29
2013-08-13 16:00:00,79.14,79.64,78.08,79.44,6956433,79.08,1.56,0.3,0.38,19.23,87.18,0.3,0.13
2013-08-14 16:00:00,79.15,79.23,77.18,77.44,8653443,78.25,2.05,1.71,-2.16,83.41,12.68,-2.52,-1.05
2013-08-15 16:00:00,76.41,76.6,74.86,75.14,13705959,75.75,1.74,1.27,-1.66,72.99,16.09,-2.97,-3.19
2013-08-16 16:00:00,75.19,76.35,74.93,75.38,9138717,75.46,1.42,0.19,0.25,13.38,31.69,0.32,-0.38
2013-08-19 16:00:00,75.49,76.44,75.18,75.21,9184197,75.58,1.26,0.28,-0.37,22.22,2.38,-0.23,0.16
2013-08-20 16:00:00,77.04,77.5,73.65,74.29,17038035,75.62,3.85,2.75,-3.57,71.43,16.62,-1.22,0.05
2013-08-21 16:00:00,74.6,75.33,73.43,73.73,10790547,74.27,1.9,0.87,-1.17,45.79,15.79,-0.75,-1.79
2013-08-22 16:00:00,73.91,74.2,73.35,74.0,6668848,73.87,0.85,0.09,0.12,10.59,76.47,0.37,-0.54
2013-08-23 16:00:00,74.15,74.2,73.11,73.89,7743049,73.84,1.09,0.26,-0.35,23.85,71.56,-0.15,-0.04


# Normalizzazione

Definiamo due funzioni che ci serviranno per ridistribuire i nostri dati originari tra il valore massimo ed il valore minimo della serie (normalizeMinMax) oppure soltanto rispetto al valore massimo (normalizeMax):

In [2]:
def normalizeMinMax(array):
    """
    Funzione per normalizzare un array
    che porta a 0 il valore minimo
    e ad 1 il valore massimo
    (a[i] - min(a))/(max(a)-min(a)) 
    """
    normalized_array = []
    for elements in array:
        normalized_array.append((elements - min(array))/(max(array)-min(array)))
    return normalized_array

def normalizeMax(array):
    """
    Funzione per normalizzare un array
    che porta ad 1 il valore massimo
    a[i]/max(a)
    """
    normalized_array = []
    for elements in array:
        normalized_array.append(elements / max(array))
    return normalized_array

instrument["DeltaAvgPricePercNormalized"] = normalizeMinMax(instrument["DeltaAvgPricePerc"])

instrument.iloc[:,-2:].head(20)

Unnamed: 0_level_0,DeltaAvgPricePerc,DeltaAvgPricePercNormalized
Date_Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2013-08-12 16:00:00,-0.29,0.580887
2013-08-13 16:00:00,0.13,0.609556
2013-08-14 16:00:00,-1.05,0.52901
2013-08-15 16:00:00,-3.19,0.382935
2013-08-16 16:00:00,-0.38,0.574744
2013-08-19 16:00:00,0.16,0.611604
2013-08-20 16:00:00,0.05,0.604096
2013-08-21 16:00:00,-1.79,0.478498
2013-08-22 16:00:00,-0.54,0.563823
2013-08-23 16:00:00,-0.04,0.597952


# Fuzzy Logic

Consente di definire degli intervalli (classi) da associare a ciascun valore della serie. L'effetto è quello di accorpare valori molto simili tra loro nella medesima classe:

In [3]:
bins = [-100, -0.5, 0.5, 100]
values = [-1, 0, 1]

instrument["FuzzyDeltaAvgPricePerc"] = pd.cut(instrument.DeltaAvgPricePerc, bins, labels=values)

instrument.iloc[:,-3:].head(20)

Unnamed: 0_level_0,DeltaAvgPricePerc,DeltaAvgPricePercNormalized,FuzzyDeltaAvgPricePerc
Date_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-08-12 16:00:00,-0.29,0.580887,0
2013-08-13 16:00:00,0.13,0.609556,0
2013-08-14 16:00:00,-1.05,0.52901,-1
2013-08-15 16:00:00,-3.19,0.382935,-1
2013-08-16 16:00:00,-0.38,0.574744,0
2013-08-19 16:00:00,0.16,0.611604,0
2013-08-20 16:00:00,0.05,0.604096,0
2013-08-21 16:00:00,-1.79,0.478498,-1
2013-08-22 16:00:00,-0.54,0.563823,-1
2013-08-23 16:00:00,-0.04,0.597952,0


In alternativa possiamo associare anche delle etichette (stringhe):

In [4]:
bins = [-100, -0.5, 0.5, 100]
names = ["ribasso", "invariato", "rialzo"]

instrument["FuzzyDeltaAvgPricePerc"] = pd.cut(instrument.DeltaAvgPricePerc, bins, labels=names)

instrument.iloc[:,-3:].head(20)

Unnamed: 0_level_0,DeltaAvgPricePerc,DeltaAvgPricePercNormalized,FuzzyDeltaAvgPricePerc
Date_Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-08-12 16:00:00,-0.29,0.580887,invariato
2013-08-13 16:00:00,0.13,0.609556,invariato
2013-08-14 16:00:00,-1.05,0.52901,ribasso
2013-08-15 16:00:00,-3.19,0.382935,ribasso
2013-08-16 16:00:00,-0.38,0.574744,invariato
2013-08-19 16:00:00,0.16,0.611604,invariato
2013-08-20 16:00:00,0.05,0.604096,invariato
2013-08-21 16:00:00,-1.79,0.478498,ribasso
2013-08-22 16:00:00,-0.54,0.563823,ribasso
2013-08-23 16:00:00,-0.04,0.597952,invariato
