Data extracted from TradingView, but any OHLC data set with volume and RSI will work.\
Times and dates are the UTC time of the event.

In [1]:
import pandas as pd

path = 'C:/Users/alexj/TradingView/current_data'
file = 'BINANCE_XRPUSD, 1D 07082024.csv'
asset = 'XRPUSD'
period = '1D'
endpoint = '07082024'

load_data = path+'/'+file

df = pd.read_csv(load_data)
df = df[['time', 'open', 'high', 'low', 'close', 'Volume', 'RSI']]

In [2]:
#  relevant functions

#  use a normalization process to make it so that assets can be compared in visuals
# df on input should contain only one column with the price data (plus dataframe index)
def normalize_data(data):
    
    """
    normalize_data uses the minimum and maximum values in the range defined by the index of the dataset.  
    The output is the current value minus the minimum, both divided by the maximum value minus the minimum.
    
    """

   # obtain the values used to create the range for the normalized value     
    min = data.min()
    max = data.max()
    x = data 
    
    # time series normalization part
    # y will be a column in a dataframe
    y = (x - min) / (max - min)
    
    return y


#  added for demostration purpose, to illustrate that the original data is required to undo normalization
#  df on input should contain only one column with the price data (plus dataframe index)
def denormalize_data(normalized_data, original_data):
    
    """
    denormalize_data uses the minimum and maximum values in the range defined by the index of the dataset.  
    The output is the current value, multiplied by the difference between 
    the maximum and minimum values plus the maximum value.
    """
    
    # obtain the values used to create the range for the normalized value
    min_value = original_data.min()
    max_value = original_data.max()
    
    # Revert the normalization
    #original_x will be a column in the dataframe
    original_x = normalized_data * (max_value - min_value) + min_value
    
    return original_x


In [3]:
"""
loaded feature definitions:

df['time'] = the the time te event was recorded
df['open'] = the oening price of the asset in the time referenced
df['high'] = the higest price of the asset in the time referenced
df['low'] = the lowest price of teh asset in the time referenced
df['close'] = the closing proce of the asset in the time referenced
df['volume'] = the transaction volume on the time referenced
df['RSI'] = the relative strenght index associated with the time referenced

"""

#  transforms to help with visualization & analysis

#  the time value transormed into the datetime type for Panadas compatibility
df['date'] = pd.to_datetime(df['time'],unit='s')

#  the value of close in the previous row
df['p_close'] = df.close.shift(1)

#  the value of close 7 rows ago
df['lw_close'] = df.close.shift(7)

#  the difference between the current close and the previous one
df['p_diff'] = df.close - df.p_close

#  the difference between the current close and the one 7 rows ago
df['lw_diff'] = df.close - df.lw_close

#  the current open, normalized based on the normalize_data function
df['open_norm'] = normalize_data(df.open)

#  the current high, normalized based on the normalize_data function
df['high_norm'] = normalize_data(df.high)

#  the current low, normalized based on the normalize_data function
df['low_norm'] = normalize_data(df.low)

#  the current close, normalized based on the normalize_data function
df['close_norm'] = normalize_data(df.close)

#  the current volume, normalized based on the normalize_data function
df['vol_norm'] = normalize_data(df.Volume)

#  the difference between the current close and the previous close, normalized using the normalize_data function
df['p_diff_norm'] = normalize_data(df.p_diff)

#  the difference between the current close and the one 7 rows ago, normalized using the normalize_data function
df['lw_diff_norm'] = normalize_data(df.lw_diff)

#  indicates whether the current difference from the previous row is positive or negative
df['direction'] = df['p_diff'].apply(lambda x: 'pos' if x >= 0 else 'neg')

#  indicates whether the current difference from 7 rows ago is positive or negative
df['lw_direction'] = df['lw_diff'].apply(lambda x: 'pos' if x >= 0 else 'neg')

#  indicates whether the previous difference from its previous row is positive or negative
df['prev_dir'] = df['direction'].shift(1)

#  indicates whether the previous difference from its 7 rows prior is positive or negative
df['lw_prev_dir'] = df['lw_direction'].shift(7)

#  is true when the indicator of previous difference from its prior row  is in the same direction as the current value
df['continue'] = df['direction'] == df['prev_dir']

#  is true when the indicator of previous difference from 7 rows prior is in the same direction as the current value
df['lw_continue'] = df['lw_direction'] == df['lw_prev_dir']

#  the 5 period rolling mean of close_norm
df['moving_5'] = df.close_norm.rolling(5).mean()

#  the 8 period rolling mean of close_norm
df['moving_8'] = df.close_norm.rolling(8).mean()

#  the 13 period rolling mean of close_norm
df['moving_13'] = df.close_norm.rolling(13).mean()

#  the day of the week of the current date
df['weekday'] = df['date'].dt.day_name()

#  show the first 15 rows
df.head(15)

Unnamed: 0,time,open,high,low,close,Volume,RSI,date,p_close,lw_close,...,direction,lw_direction,prev_dir,lw_prev_dir,continue,lw_continue,moving_5,moving_8,moving_13,weekday
0,1418515200,0.0169,0.0176,0.0168,0.0173,9187.0,75.338849,2014-12-14,,,...,neg,neg,,,False,False,,,,Sunday
1,1418601600,0.0173,0.0187,0.0168,0.0182,11209.0,77.567811,2014-12-15,0.0173,,...,pos,neg,neg,,False,False,,,,Monday
2,1418688000,0.0183,0.0228,0.0171,0.0213,31181.0,83.200246,2014-12-16,0.0182,,...,pos,neg,pos,,True,False,,,,Tuesday
3,1418774400,0.0213,0.0272,0.0206,0.0259,37492.0,88.010804,2014-12-17,0.0213,,...,pos,neg,pos,,True,False,,,,Wednesday
4,1418860800,0.0259,0.0293,0.0245,0.0281,48431.0,89.551744,2014-12-18,0.0259,,...,pos,neg,pos,,True,False,0.006533,,,Thursday
5,1418947200,0.0281,0.0288,0.0225,0.0235,23133.0,69.451648,2014-12-19,0.0281,,...,neg,neg,pos,,False,False,0.006981,,,Friday
6,1419033600,0.0235,0.0245,0.0219,0.0242,20223.0,70.535447,2014-12-20,0.0235,,...,pos,neg,neg,,False,False,0.007415,,,Saturday
7,1419120000,0.0242,0.0255,0.0232,0.0248,15842.0,71.469785,2014-12-21,0.0242,0.0173,...,pos,pos,pos,neg,True,False,0.007668,0.006805,,Sunday
8,1419206400,0.0249,0.0264,0.0245,0.0257,20392.0,72.860024,2014-12-22,0.0248,0.0182,...,pos,pos,pos,neg,True,False,0.007654,0.007185,,Monday
9,1419292800,0.0257,0.0261,0.0237,0.0241,11836.0,66.642756,2014-12-23,0.0257,0.0213,...,neg,pos,pos,neg,False,False,0.007365,0.007451,,Tuesday


In [4]:
#  export the new dataset to a .csv file in the smae directory as the notebook
df.to_csv(f'{asset}_{period}_{endpoint}_normalized.csv', index=False)