In [1]:
import pandas as pd
import numpy as np

In [2]:
#Functions

def frequency_group_table(df,start,end,interval):
    group=pd.cut(df.to_numpy(),bins=list(range(start,end+1,interval)),right=False).value_counts().to_frame()
    group.reset_index(level=0, inplace=True)
    group=group.rename(columns={"index": "group", 0: "Frequency"})
    group['RangeStart']=group['group'].apply(lambda x: x.left)
    group['RangeEnd']=group['group'].apply(lambda x: x.right)
    group=group.sort_values(by=['RangeStart', 'RangeEnd'])[['RangeStart','RangeEnd','Frequency']]
    return group

def cum_freq_table(df):
    #Cum frequency from frequency
    df['cum_freq'] = df['Frequency'].cumsum()
    #Add left and right boundary
    #Discarding the group with zero frequency to make calculation more easier
    df=df[df['Frequency']!=0]
    return df

def ith_value_finder(percentile,N):
    ithvalue=(percentile*N)/100
    return ithvalue

def percentile_calculator(cum_freq,ithvalue,interval): 
    
    '''
        Pj class = (jn/100)th value of the observation
        Pj=L+((jn/100)-cff/f)⋅i
        where, 
                n= no. of observation
                Pj= locate the jth percentile group.
                L= lower class boundary of the class containing the jth percentile
                cff = cumulative frequency of the class immediately preceding to the class containing Pj
                f= frequency of that group
                i= interval

    '''
    if len([d for d in cum_freq if d['cum_freq'] <= ithvalue])==0:
        cff=0
    else:
        cff=[d for d in cum_freq if d['cum_freq'] <= ithvalue][-1]['cum_freq']
    result=[d for d in cum_freq if d['cum_freq'] >= ithvalue][0]
    L=result['RangeStart']
    f=result['Frequency']
    i=interval
    percentile=round(L+((ithvalue-cff)/f)*i,2)
    return percentile

def percentile_main(cum_freq,interval,percentile):
    ithvalue=ith_value_finder(percentile,cum_freq['Frequency'].sum())
    approx=percentile_calculator(cum_freq.to_dict('records'),ithvalue,interval)
    return approx

def error_per(actual,approx):
    '''
        Percentage Error:|Approximate Value − Actual Value|/Actual Value  × 100%

    '''
    error_per=round(abs(approx-actual)/actual*100,2)
    return error_per

def add_frequency_df(df1,df2):
    df1['Frequency']=df1['Frequency']+df2['Frequency']
    return df1

def combine_array(array1,array2):
    combined_array=np.append(array1,array2, axis=0)
    #Remove Nan if there's any
    combined_array = combined_array[~np.isnan(combined_array)]
    return combined_array

In [3]:
df=pd.read_csv('ca9f80a6-7d76-4907-a06f-7fd2484fd2f5.csv')
df['Percentile']

KeyError: '_col0'

In [4]:
interval=10
percentiles=[0,10,25,50,75,90,95,99]
old_datapoint=df['_col0']

#New data point muse be an array
new_datapoint=[21,32,42,94,56,55]

#Converting df data to frequency 
old_datapoint_freq=frequency_group_table(old_datapoint,0,100,interval)

new_datapoint_freq=frequency_group_table(pd.DataFrame(new_datapoint)[0],0,100,interval)


combinearray=combine_array(old_datapoint.to_numpy(),new_datapoint)

freq=add_frequency_df(old_datapoint_freq,new_datapoint_freq)
cum_freq=cum_freq_table(freq)

percentile_array=[]
actual_array=[]
calculated_array=[]
error_per_array=[]
difference_array=[]

for percentile in percentiles:
    percentile_array.append(percentile)
    approx=percentile_main(cum_freq,interval,percentile)
    calculated_array.append(approx)
    actual=round(np.quantile(combinearray,percentile/100),2)
    actual_array.append(actual)
    error_per_array.append(error_per(actual,approx))
    difference_array.append(abs(actual-approx))
    
main={}
main['Percentile']=percentile_array
main['Actual Value']=actual_array
main['Calculated Value']=calculated_array
main['Error_Percent']=error_per_array
main['Difference']=difference_array

pd.DataFrame.from_dict(main)

Unnamed: 0,Percentile,Actual Value,Calculated Value,Error_Percent,Difference
0,0,0.6,0.0,100.0,0.6
1,10,1.2,1.47,22.5,0.27
2,25,1.8,3.68,104.44,1.88
3,50,3.8,7.37,93.95,3.57
4,75,13.4,13.89,3.66,0.49
5,90,24.1,25.61,6.27,1.51
6,95,34.3,36.9,7.58,2.6
7,99,66.6,66.39,0.32,0.21
