In [None]:
## CYCLE IDENTIFCATION AND CHECK NEED IMPROVEMENT FOR NON-STANDARD TIME SERIES I.E. DIESEL

In [1]:
def cycle_tracker(df, sensitivity, long_term):

    import pandas as pd
    import numpy as np

    # Takes an "ORDERED" series of dates and values from oldest to newest and outputs a dataframe of cycle stats 
    # date must be first column and value must be second column
    # sensitivity i.e. 0.05 = 5% for minimum change in peak to trough or trough to peak as additional criteria for cycle selection
    # long_term specifies the number of short cycles to include in the long cycle indentification. i.e. 7 will make the latest 7 short cycles into the latest long cycle
    
    # re-name columns based on position because names not known
    df.rename(columns={ df.columns[0]: "date" }, inplace = True)
    df.rename(columns={ df.columns[1]: "value" }, inplace = True)
    
    # insert empty new columns
    
    # df['date'] col index 0
    # df['price'] col index 1
    
    # short term cycle
    df['movement'] = 0 # col index 2
    df['movement_perc'] = 0 # col index 3
    df['direction'] = 'steady' # col index 4
    df['peak_trough'] = 'mid-cycle' # col index 5
    df['peak_trough_strength_perc'] = 0 # col index 6
    df['absolute_change_since_last_peak_trough'] = 0 # col index 7
    df['peak_trough_days'] = 0 # col index 8
    df['full_cycle_number'] = 1 # col index 9
    df['full_cycle_days'] = 0 # col index 10
    df['full_cycle_current_strength'] = 0 # col index 11
    df['full_absolute_cycle_change'] = 0 # col index 12
    df['current_price_cycle_min_difference'] = 0 # col index 13
    df['current_price_cycle_max_difference'] = 0 # col index 14
    df['short_cycle_min'] = df.iloc[0, 1] # col index 15
    df['short_cycle_max'] = df.iloc[0, 1] # col index 16
    
    # long term cycle
    df['long_term_cycle_identifier'] = 1 # col index 17
    df['long_cycle_absolute_change'] = 0 # col index 18
    df['long_term_cycle_days'] = 0 # col index 19
    df['long_term_cycle_difference_min'] = 0 # col index 20
    df['long_term_cycle_difference_max'] = 0 # col index 21 
    df['long_term_cycle_min'] = df.iloc[0, 1] # col index 22
    df['long_term_cycle_max'] = df.iloc[0, 1] # col index 23
    
    
    
    # for loop(1) set up 
    nrows = len(df)
    nrows1 =list(range(1, nrows))
    
    for index in nrows1:
    
        sub_index = index - 1
    
        # calculate movement values
        df.iloc[index, 2] = df.iloc[index, 1] - df.iloc[sub_index, 1] 
        df.iloc[index, 3] = (df.iloc[index, 1] - df.iloc[sub_index, 1])/df.iloc[index-1, 1]

        if df.iloc[index, 3] < 0:
            df.iloc[index, 4] = 'down'
        elif df.iloc[index, 3] > 0:
            df.iloc[index, 4] = 'up'
            
    # for loop(2) set up  
    start_index = min((df.direction.values == 'up').argmax(), (df.direction.values == 'down').argmax()) + 1
    nrows1 =list(range(start_index, nrows))
    last_value = df.iloc[start_index, 4]    
    cycle = 0
    
    # Identify peaks and troughs at sensitivity level
    for index in nrows1:
        
        sub_index = index - 1
    
        if (last_value == 'up') and (df.iloc[index, 4] == 'down') and (abs((df.iloc[sub_index, 1] - df.iloc[cycle, 1])/df.iloc[cycle, 1]) > sensitivity):
            df.iloc[sub_index, 5] = 'peak'
            last_value = 'down'
            cycle = sub_index

        elif (last_value == 'down') and (df.iloc[index, 4] == 'up') and (abs((df.iloc[sub_index, 1] - df.iloc[cycle, 1])/df.iloc[cycle, 1]) > sensitivity):
            df.iloc[sub_index, 5] = 'trough'
            last_value = 'up'
            cycle = sub_index
            
#######################################
    ###### peak trough check ######
    start_index = min((df.peak_trough.values == 'trough').argmax(), (df.peak_trough.values == 'peak').argmax()) + 1
    nrows1 =list(range(start_index + 1, nrows))
    last_value = df.iloc[start_index, 5]
    
    # if 2 peaks or troughs in a row change them back to mid-cycle
    for index in nrows1:
    
        if (df.iloc[index, 5] == 'peak' or df.iloc[index, 5] == 'trough') and df.iloc[index, 5] == last_value:
            df.iloc[index, 5] = 'mid-cycle'
        
        elif (df.iloc[index, 5] == 'peak' or df.iloc[index, 5] == 'trough') and df.iloc[index, 5] != last_value:
            last_value = df.iloc[index, 5]
#######################################  

    # for loop(3) set up
    nrows1 =list(range(1, nrows))
    sub_index = 0
    
    # calculate movements and number of days from last peak or trough to current date (full peak trough movement) 
    for index in nrows1:
    
        if  df.iloc[index, 5] == 'trough' or df.iloc[index, 5] == 'peak':
            df.iloc[index, 6] = (df.iloc[index, 1] - df.iloc[sub_index, 1])/df.iloc[sub_index, 1]
            df.iloc[index, 7] = df.iloc[index, 1] - df.iloc[sub_index, 1]
            df.iloc[index, 8] = index - sub_index
            sub_index = index
        else:
            df.iloc[index, 6] = (df.iloc[index, 1] - df.iloc[sub_index, 1])/df.iloc[sub_index, 1]
            df.iloc[index, 7] = df.iloc[index, 1] - df.iloc[sub_index, 1]
            df.iloc[index, 8] = index - sub_index
            
    # for loop(4) set up
    cycle_point = min((df.peak_trough.values == 'peak').argmax(), (df.peak_trough.values == 'trough').argmax()) 
    start_cycle_point = df.iloc[cycle_point, 5] # first peak/trough value
    nrows1 =list(range(0, nrows))
    cycle = 1
    sub_index = 0
    
    # Identify all cycles - based on trough to trough or peak to peak. Whichever occurs first in the series
    for index in nrows1:
    
        if df.iloc[index, 5] == start_cycle_point:
            df.iloc[index, 9] = cycle
            df.iloc[index, 10] = index - sub_index
            cycle += 1
            sub_index = index
        else:
            df.iloc[index, 9] = cycle
            df.iloc[index, 10] = index - sub_index
            
    # for loop(5) set up
    nrows1 =list(range(1, nrows - 1))
    cycle_index = 0
    
    # calculate full cycle movements from last day of the last cycle to the current day up to the last day of the current cycle 
    for index in nrows1:
    
        sub_index = index + 1
    
        if df.iloc[index, 9] != df.iloc[sub_index, 9]:
            df.iloc[index, 11] = (df.iloc[index, 1] - df.iloc[cycle_index, 1])/df.iloc[cycle_index, 1]
            df.iloc[index, 12] = df.iloc[index, 1] - df.iloc[cycle_index, 1]
            cycle_index = index # last day of the last cycle
        
        else:
            df.iloc[index, 11] = (df.iloc[index, 1] - df.iloc[cycle_index, 1])/df.iloc[cycle_index, 1]
            df.iloc[index, 12] = df.iloc[index, 1] - df.iloc[cycle_index, 1]
     
    df.iloc[index + 1, 11] = (df.iloc[index + 1, 1] - df.iloc[cycle_index, 1])/df.iloc[cycle_index, 1]
    df.iloc[index + 1, 12] = df.iloc[index + 1, 1] - df.iloc[cycle_index, 1]
    
    
  
 # calculate current price difference from current full cycle min and max 
 # setup for loop(6) set up
    nrows1 =list(range(1, nrows))
    cycle_min = df.iloc[0, 1]
    cycle_max = df.iloc[0, 1]

   
    # cycle start is considered last day of the previous cycle
    for index in nrows1:
    
        sub_index = index -1
    
        if (df.iloc[index, 9] == df.iloc[sub_index, 9]) & (cycle_max < df.iloc[index, 1]):
            cycle_max = df.iloc[index, 1]
    
        elif (df.iloc[index, 9] == df.iloc[sub_index, 9]) & (cycle_min > df.iloc[index, 1]):
            cycle_min = df.iloc[index, 1]
   
        elif df.iloc[index, 9] != df.iloc[sub_index, 9]:
            cycle_min = df.iloc[index, 1]
            cycle_max = df.iloc[index, 1]
                
        df.iloc[index, 13] = df.iloc[index, 1] - cycle_min           
        df.iloc[index, 14] = df.iloc[index, 1] - cycle_max
        
        
    # Identify short term cycle min and max
    
    # for loop(7) set up
    nrows1 =list(range(1, nrows))
    cycle = 1
    sub_index = 0
    
    for index in nrows1:
        
        if df.iloc[index, 9] == cycle:
            df.iloc[index, 15] = min(df.iloc[sub_index:index + 1, 1])
            df.iloc[index, 16] = max(df.iloc[sub_index:index + 1, 1])
            
        elif df.iloc[index, 9] != cycle:
            sub_index = index
            df.iloc[index, 15] = df.iloc[index, 1]
            df.iloc[index, 16] = df.iloc[index, 1]
            cycle += 1
    
    ###### Long term cycle identification
    
    cycles = df.iloc[:, 9]/long_term
    long_cycles = cycles.apply(np.floor) + 1
    long_cycles = long_cycles.astype(int)
    df['long_term_cycle_identifier'] = long_cycles
    
    # Identify absolute change in long term cycle
    
    # for loop(8) set up
    nrows1 =list(range(1, nrows - 1))
    cycle_index = 0
    
    # calculate full long cycle movements from last day of the last cycle to the current day up to the last day of the current cycle 
    for index in nrows1:
    
        sub_index = index + 1
    
        if df.iloc[index, 17] != df.iloc[sub_index, 17]:
            df.iloc[index, 18] = df.iloc[index, 1] - df.iloc[cycle_index, 1]
            cycle_index = index # last day of the last cycle
        
        else:
            df.iloc[index, 18] = df.iloc[index, 1] - df.iloc[cycle_index, 1]
     
    df.iloc[index + 1, 18] = df.iloc[index + 1, 1] - df.iloc[cycle_index, 1]
    
    
    # identify number of days in long term cycle
    
    # for loop(9) set up
    nrows1 =list(range(0, nrows))
    cycle = 1
    sub_index = 0
    
    for index in nrows1:
    
        if df.iloc[index, 17] == cycle:
            df.iloc[index, 19] = index - sub_index
            
        elif df.iloc[index, 17] != cycle:
            sub_index = index
            df.iloc[index, 19] = index - sub_index
            cycle += 1
            
            
    # Identify difference from max and min values in long term cycle
    
    # for loop(10) set up
    nrows1 =list(range(1, nrows))
    cycle_min = df.iloc[0, 1]
    cycle_max = df.iloc[0, 1]
    
    # cycle start is considered last day of the previous cycle
    for index in nrows1:
    
        sub_index = index -1
    
        if (df.iloc[index, 17] == df.iloc[sub_index, 17]) & (cycle_max < df.iloc[index, 1]):
            cycle_max = df.iloc[index, 1]
    
        elif (df.iloc[index, 17] == df.iloc[sub_index, 17]) & (cycle_min > df.iloc[index, 1]):
            cycle_min = df.iloc[index, 1]
   
        elif df.iloc[index, 17] != df.iloc[sub_index, 17]:
            cycle_min = df.iloc[index, 1]
            cycle_max = df.iloc[index, 1]
                
        df.iloc[index, 20] = df.iloc[index, 1] - cycle_min           
        df.iloc[index, 21] = df.iloc[index, 1] - cycle_max
        

    # Identify long term cycle min and max
    
    # for loop(11) set up
    nrows1 =list(range(1, nrows))
    cycle = 1
    sub_index = 0
    
    for index in nrows1:
        
        if df.iloc[index, 17] == cycle:
            df.iloc[index, 22] = min(df.iloc[sub_index:index +1, 1])
            df.iloc[index, 23] = max(df.iloc[sub_index:index +1, 1])
            
        elif df.iloc[index, 17] != cycle:
            sub_index = index
            df.iloc[index, 22] = df.iloc[index, 1]
            df.iloc[index, 23] = df.iloc[index, 1]
            cycle += 1
    
            
    ###### Create short cycle stats dataframe ######
    cycle_stats_short = df.groupby("full_cycle_number")['value'].describe().reset_index()
    cycle_stats_short.rename(columns={ 'count': 'number_days'}, inplace = True)

    nrows = len(cycle_stats_short)
    nrows1 =list(range(0, nrows))
            
    cycle_stats_short['range_width'] = 0            

    for index in nrows1:
        cycle_stats_short.iloc[index, 9] = cycle_stats_short.iloc[index, 8] - cycle_stats_short.iloc[index, 4]
        
        
    ###### Create long cycle stats dataframe ######
    cycle_stats_long = df.groupby("long_term_cycle_identifier")['value'].describe().reset_index()
    cycle_stats_long.rename(columns={ 'count': 'number_days'}, inplace = True)

    nrows = len(cycle_stats_long)
    nrows1 =list(range(0, nrows))
            
    cycle_stats_long['range_width'] = 0            

    for index in nrows1:
        cycle_stats_long.iloc[index, 9] = cycle_stats_long.iloc[index, 8] - cycle_stats_long.iloc[index, 4]
    
    #### return both datasets #####
    return(df, cycle_stats_short, cycle_stats_long)