In [1]:
%matplotlib inline
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import time
import pandas as pd
import plotly.graph_objects as goa
import warnings
from tbats import TBATS
import os
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt

from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

In [2]:
dataFrame = pd.read_table(r"./Data/DailyData_VolumeProfile.csv", sep = ";", engine = 'python')

## De-normaliser - Moyenne Mobile

#### Liste de DataFrames par semaine

In [3]:
week_days = ["Lundi", "Mardi", "Mercredi", "Jeudi", "Vendredi", "Samedi", "Dimanche"]

df_all_weeks = []
i = 0
while i < len(dataFrame):
    this_week = pd.DataFrame()

    if dataFrame.iloc[i].Jour == 'Lundi':
        today = dataFrame.iloc[i].Date
        j = i
        while j < len(dataFrame) and dataFrame.iloc[j].Date == today:
            #on recupere d'abord tous les 'lundi'
            j += 1
        for week_day in week_days:
            while j < len(dataFrame) and dataFrame.iloc[j].Jour == week_day:
                j+=1
        this_week = dataFrame.iloc[i:j]
        df_all_weeks.append(this_week)
        i = j
    else:
        i+=1

In [10]:
def moyenneSemaine(week):
    return np.mean(list(week.Last))
    
def moyenneMobileSemaine(num_semaine, profondeur):
    liste = []
    k = num_semaine
    
    while k >= 0 and len(liste) < profondeur:
        week = df_all_weeks[k]
        liste.append(moyenneSemaine(week))
        k -= 1
        
    return np.mean(liste)

In [4]:
def chercher_semaine_donnees_brutes(date, time):
    if time == 'Semaine':
        time = ' 08:00:00.0'
    day, month, year = date.split("/")
    
    for nb_week in range(len(df_all_weeks)):
        df = df_all_weeks[nb_week]
        firstDate = df.iloc[0].Date
        lastDate = df.iloc[len(df)-1].Date
        #print("fD " + firstDate + ", lD " + lastDate)
        if firstDate.split("/")[2] == year or lastDate.split("/")[2] == year:
            #if not then we keep going with the next week
            # we do so, so as to greatly reduce the amount of dataframes to look into to find the right one
            i = 0
            while i < len(df) and not (df.iloc[i].Date == date and df.iloc[i].Time == time):
                i += 1
            if i < len(df):
                #we have found the right week
                return nb_week, i
                
    return -1, -1 #no matching result has been found

In [5]:
def reverseNormalizationColumn(df, df_normalise, date, time, donnee, profondeur):
    
    nb_week, iloc = chercher_semaine_donnees_brutes(date, time)
    nb_week -= 1 #using week n to normalize the week intersting us, week n+1
    if (nb_week < 0):
        return -1
    else:
        df_norm = df_normalise.copy(deep=True)
        if time == 'Semaine': #goal is to create a fake 'Time' column for daily dataFrames, where such column isn't present
            time = ' 08:00:00.0'
        
        current_week = df_all_weeks[nb_week]
        our_week = df_all_weeks[nb_week+1]
        
        coeff = moyenneMobileSemaine(nb_week, profondeur)

        if donnee == 'Last':
            normValue = list(df_norm.loc[(df_norm.Date == date) & (df_norm.Time == time)].Last)[0]
        elif donnee == 'Open':
            normValue = list(df_norm.loc[(df_norm.Date == date) & (df_norm.Time == time)].Open)[0]
        elif donnee == 'High':
            normValue = list(df_norm.loc[(df_norm.Date == date) & (df_norm.Time == time)].High)[0]
        else:
            normValue = list(df_norm.loc[(df_norm.Date == date) & (df_norm.Time == time)].Low)[0]  
        
        unNormalizedValue = coeff*normValue
        return unNormalizedValue

In [6]:
def reverseNormalizationColumn(df, df_normalise, date, time, donnee, profondeur):
    
    nb_week, iloc = chercher_semaine_donnees_brutes(date, time)
    nb_week -= 1 #using week n to normalize the week intersting us, week n+1
    if (nb_week < 0):
        return -1
    else:
        df_norm = df_normalise.copy(deep=True)
        if time == 'Semaine': #goal is to create a fake 'Time' column for daily dataFrames, where such column isn't present
            time = '08:00:00'
            times = ['08:00:00']*len(df_norm)
            df_norm.insert(loc=len(df_norm.columns), column='Time', value=pd.Series(times))
        
        current_week = df_all_weeks[nb_week]
        our_week = df_all_weeks[nb_week+1]
        
        coeff = moyenneMobileSemaine(nb_week, profondeur)

        if donnee == 'Last':
            normValue = list(df_norm.loc[(df_norm.Date == date) & (df_norm.Time == time)].Last)[0]
        elif donnee == 'Open':
            normValue = list(df_norm.loc[(df_norm.Date == date) & (df_norm.Time == time)].Open)[0]
        elif donnee == 'High':
            normValue = list(df_norm.loc[(df_norm.Date == date) & (df_norm.Time == time)].High)[0]
        else:
            normValue = list(df_norm.loc[(df_norm.Date == date) & (df_norm.Time == time)].Low)[0]  
        
        unNormalizedValue = coeff*normValue
        return unNormalizedValue

In [7]:
def reverseNormalization(dataFrame_donnees_brutes, dataFrame_normalise, date, time, profondeur):
    reverseNormOpen = reverseNormalizationColumn(dataFrame_donnees_brutes, dataFrame_normalise, date, time, 'Open', profondeur)
    reverseNormHigh = reverseNormalizationColumn(dataFrame_donnees_brutes, dataFrame_normalise, date, time, 'High', profondeur)
    reverseNormLow = reverseNormalizationColumn(dataFrame_donnees_brutes, dataFrame_normalise, date, time, 'Low', profondeur)
    reverseNormLast = reverseNormalizationColumn(dataFrame_donnees_brutes, dataFrame_normalise, date, time, 'Last', profondeur)
    
    return round(reverseNormOpen, 0), round(reverseNormHigh, 0), round(reverseNormLow, 0), round(reverseNormLast, 0)

In [8]:
def reverseDaily(dataFrame_daily, dataFrame_norm_daily, profondeur):    
    dates = []
    opens = []
    highs = []
    lows = []
    lasts = []

    for i in range(len(dataFrame_norm_daily)):
        tmp_df = dataFrame_norm_daily.iloc[i]
        date = tmp_df.Date
        open_, high, low, last = reverseNormalization(dataFrame_daily, dataFrame_norm_daily, date, 'Semaine', profondeur)
        dates.append(date)
        opens.append(open_)
        highs.append(high)
        lows.append(low)
        lasts.append(last)
        
    df_reversedNorm_day = pd.DataFrame()
    df_reversedNorm_day.insert(loc=0, column='Date', value=pd.Series(dates))
    df_reversedNorm_day.insert(loc=1, column='Open', value=pd.Series(opens))
    df_reversedNorm_day.insert(loc=2, column='High', value=pd.Series(highs))
    df_reversedNorm_day.insert(loc=3, column='Low', value=pd.Series(lows))
    df_reversedNorm_day.insert(loc=4, column='Last', value=pd.Series(lasts))
        
    df_reversed_day = df_reversedNorm_day.loc[df_reversedNorm_day.High != -1]
    return df_reversed_day

In [9]:
def reverseInitialBalance(dataFrame_IB, dataFrame_norm_IB, profondeur):    
    dates = []
    opens = []
    highs = []
    lows = []
    lasts = []
    
    df_norm_IB = dataFrame_norm_IB.copy(deep=True)
    df_norm_IB = df_norm_IB.rename(columns={'Open Matinal': 'Open'})
    df_norm_IB = df_norm_IB.rename(columns={'High Matinal': 'High'})
    df_norm_IB = df_norm_IB.rename(columns={'Low Matinal': 'Low'})
    df_norm_IB = df_norm_IB.rename(columns={'Last Matinal': 'Last'})
    
    for i in range(len(dataFrame_norm_IB)):
        tmp_df = df_norm_IB.iloc[i]
        date = tmp_df.Date
        open_, high, low, last = reverseNormalization(dataFrame_IB, df_norm_IB, date, 'Semaine', profondeur)
        dates.append(date)
        opens.append(open_)
        highs.append(high)
        lows.append(low)
        lasts.append(last)
        
    df_reversedNorm_day = pd.DataFrame()
    df_reversedNorm_day.insert(loc=0, column='Date', value=pd.Series(dates))
    df_reversedNorm_day.insert(loc=1, column='Open', value=pd.Series(opens))
    df_reversedNorm_day.insert(loc=2, column='High', value=pd.Series(highs))
    df_reversedNorm_day.insert(loc=3, column='Low', value=pd.Series(lows))
    df_reversedNorm_day.insert(loc=4, column='Close', value=pd.Series(lasts))
        
    df_reversed_day = df_reversedNorm_day.loc[df_reversedNorm_day.High != -1]
    return df_reversed_day

# De-Normaliser : Mois par Mois

In [None]:
def moyennes_last_list(df_daily):
    """
    calcule les moyennes du last mois par mois
    """
    actual_month = 1
    moyenne_last = 0
    moyenne_last_list = []
    nb_days_month = 0
    for k in range(len(df_daily)):
        if(df_daily['Date'][k].split('/')[1]==str(actual_month)):
            moyenne_last += df_daily['Last'][k]
            nb_days_month +=1
        else:
            moyenne_last /= nb_days_month
            moyenne_last_list.append(moyenne_last)
            actual_month += 1
            if (actual_month == 13):
                actual_month = 1
            moyenne_last = df_daily['Last'][k]
            nb_days_month =1
    return moyenne_last_list

In [None]:
def moyennes_volumes_month(df_daily):
    """
    calcule la moyenne du volume, du bidVolume et de l'askVolume sur toute la période
    """
    moyenne_BidVol = np.sum(list(df_daily.BidVolume)) / len(df_daily.BidVolume)
    moyenne_AskVol = np.sum(list(df_daily.AskVolume)) / len(df_daily.AskVolume)
    moyenne_Vol = np.sum(list(df_daily.Volume)) / len(df_daily.Volume)
    BidVolume = []
    AskVolume = []
    Volume = []
    for k in range(22, len(df_daily)):
        BidVolume.append(df_daily.BidVolume[k]/moyenne_BidVol)
        AskVolume.append(df_daily.AskVolume[k]/moyenne_AskVol)
        Volume.append(df_daily.Volume[k]/moyenne_Vol)
    return moyenne_BidVol, moyenne_AskVol, moyenne_Vol

In [None]:
def denorm_month(df_daily, df_normalized, moyenne_last_list, moyenne_BidVol, moyenne_AskVol, moyenne_Vol):
    "effectue la dénormalisation mois par mois"
    Open = []
    High = []
    Low = []
    Last = []
    actual_month = 2
    index_mean_precedent_month = 0
    for k in range(len(df_normalized)):
        if(df_normalized['Date'][k].split('/')[1]==str(actual_month)):
            Open.append(df_normalized['Open'][k]*moyenne_last_list[index_mean_precedent_month])
            High.append(df_normalized['High'][k]*moyenne_last_list[index_mean_precedent_month])
            Low.append(df_normalized['Low'][k]*moyenne_last_list[index_mean_precedent_month])
            Last.append(df_normalized['Last'][k]*moyenne_last_list[index_mean_precedent_month])
        else:
            actual_month += 1
            if (actual_month == 13):
                actual_month = 1
            index_mean_precedent_month += 1
            Open.append(df_normalized['Open'][k]*moyenne_last_list[index_mean_precedent_month])
            High.append(df_normalized['High'][k]*moyenne_last_list[index_mean_precedent_month])
            Low.append(df_normalized['Low'][k]*moyenne_last_list[index_mean_precedent_month])
            Last.append(df_normalized['Last'][k]*moyenne_last_list[index_mean_precedent_month])
            
    BidVolume = []
    AskVolume = []
    Volume = []
    for k in range(len(df_normalized)):
        BidVolume.append(df_normalized.BidVolume[k]*moyenne_BidVol)
        AskVolume.append(df_normalized.AskVolume[k]*moyenne_AskVol)
        Volume.append(df_normalized.Volume[k]*moyenne_Vol)
        
    denorm_month_dataFrame = pd.DataFrame()
    denorm_month_dataFrame['Date'] = df_normalized['Date']
    denorm_month_dataFrame['Jour'] = df_normalized['Jour']
    denorm_month_dataFrame['Open'] = pd.Series(Open)
    denorm_month_dataFrame['High'] = pd.Series(High)
    denorm_month_dataFrame['Low'] = pd.Series(Low)
    denorm_month_dataFrame['Last'] = pd.Series(Last)
    denorm_month_dataFrame['Spread'] = pd.Series(High)-pd.Series(Low)
    denorm_month_dataFrame['Volume'] = pd.Series(Volume)
    denorm_month_dataFrame['BidVolume'] = pd.Series(BidVolume)
    denorm_month_dataFrame['AskVolume'] = pd.Series(AskVolume)
    df2 = df_daily.loc[:21,['Date', 'Jour', 'Open', 'High', 'Low', 'Last','Spread','Volume', 'BidVolume','AskVolume' ]]
    df3 = pd.concat([df2, denorm_month_dataFrame], ignore_index =True)
    return df3

# De-Normaliser Max-Min

In [None]:
def min_max(df_daily):
    max_high = max(df_daily['High'])
    min_low = min(df_daily['Low'])
    return max_high, min_low

In [None]:
def denormalisation_min_max(df_daily, norm_min_max_dataFrame, max_high, min_low, moyenne_BidVol, moyenne_AskVol, moyenne_Vol):
    denorm_low_min_max = norm_min_max_dataFrame['Low']*(max_high-min_low)+min_low
    denorm_high_min_max = norm_min_max_dataFrame['High']*(max_high-min_low)+min_low
    denorm_open_min_max = norm_min_max_dataFrame['Open']*(max_high-min_low)+min_low
    denorm_close_min_max = norm_min_max_dataFrame['Last']*(max_high-min_low)+min_low
    denorm_min_max_dataFrame = pd.DataFrame()
    denorm_min_max_dataFrame['Date'] = norm_min_max_dataFrame['Date']
    denorm_min_max_dataFrame['Jour'] = norm_min_max_dataFrame['Jour']

    denorm_min_max_dataFrame['Open'] = denorm_open_min_max
    denorm_min_max_dataFrame['High'] = denorm_high_min_max
    denorm_min_max_dataFrame['Low'] = denorm_low_min_max
    denorm_min_max_dataFrame['Last'] = denorm_close_min_max
    denorm_min_max_dataFrame['Spread'] = denorm_min_max_dataFrame['High']- denorm_min_max_dataFrame['Low']
    BidVolume = []
    AskVolume = []
    Volume = []
    for k in range(len(df_daily)):
        BidVolume.append(norm_min_max_dataFrame.BidVolume[k]*moyenne_BidVol)
        AskVolume.append(norm_min_max_dataFrame.AskVolume[k]*moyenne_AskVol)
        Volume.append(norm_min_max_dataFrame.Volume[k]*moyenne_Vol)
    denorm_min_max_dataFrame['Volume'] = pd.Series(Volume)
    denorm_min_max_dataFrame['BidVolume'] = pd.Series(BidVolume)
    denorm_min_max_dataFrame['AskVolume'] = pd.Series(AskVolume)
    return denorm_min_max_dataFrame