In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta
from datetime import datetime


In [2]:
#bloomberg sample data for 5 currencies 
df = pd.read_excel('Dummy Dataset.xlsx')
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.set_index('Date', inplace=True)
df = df.sort_index()
df.head()

Unnamed: 0_level_0,KWN,KRW,TWD,NTN,IRN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-05-17,1193.45,1195.6,31.28,31.462,71.14
2019-05-20,1192.75,1194.1,31.386,31.488,71.19
2019-05-21,1191.78,1194.4,31.464,31.505,71.69
2019-05-22,1189.53,1193.15,31.538,31.468,72.0
2019-05-23,1188.0,1189.15,31.544,31.531,71.78


In [3]:
def trans_preprocessing(trader_data):
    #handle direction and notional of trade 
    #Size is a new column with the direction of trade
    # long: positive; short: negative
    trader_data['Size'] = trader_data.Direction.apply(
               lambda x: (1 if x == 'long' else -1))
    trader_data['Size'] = trader_data.Size * trader_data['Size/Notional']
    trader_data['Timestamp'] = pd.to_datetime(trader_data['Timestamp'],dayfirst=True)

In [4]:
#transaction data for one trade A
transaction_A = pd.read_csv('A_transaction.csv')
trans_preprocessing(transaction_A)
transaction_A

Unnamed: 0,Portfolio,Type of Trade,Product,Direction,Price,Size/Notional,Tenor,Amount to Risk,Time Frame,Strategy,Timestamp,User,Size
0,A2,Outright,TWD,long,30.98,6,,50000,,Strategy,2019-08-28,A,6
1,A2,Outright,TWD,long,31.01,5,,50000,,Strategy,2019-08-29,A,5
2,A2,Outright,TWD,long,30.43,10,,50000,,Strategy,2019-08-30,A,10
3,A2,Outright,NTN,long,30.45,12,,50000,,Strategy,2019-08-30,A,12
4,A2,Outright,TWD,short,30.23,7,,50000,,Strategy,2019-09-02,A,-7
5,A2,Outright,NTN,short,31.2,2,,50000,,Strategy,2019-09-02,A,-2
6,A2,Outright,NTN,short,31.45,3,,50000,,Strategy,2019-09-03,A,-3
7,A1,Outright,IRN,long,70.0,15,,50000,,Strategy,2019-09-04,A,15
8,A1,Outright,IRN,long,69.32,10,,50000,,Strategy,2019-09-05,A,10
9,A1,Outright,KWN,long,1178.56,10,,50000,,Strategy,2019-09-09,A,10


In [5]:
get_datetime = lambda s: datetime.strptime(s, "%d/%m/%Y")

#find the nearest date after a given date
def nearest_after(ls,base):
    base = get_datetime(base)
    later = filter(lambda d: d >= base, ls)
    try:
        return min(later)
    except ValueError:
        return None 
    #closest_date = min(later)
    #return closest_date


### The below function only works for start from timestamp 0
### i.e. : no other trade made before that

In [8]:
def pnl_product(start,end,trader_df,portfolio,currency,df):
    #start is the nearest date after the given starting date when there is a trade (dd/mm/yy)
    #start will be taken as the starting point 
    #all previous trade will be ignored

    
    #input example: '1/9/2019', '10/9/2019', A1_KWN, bloomberg_df
    #input dates are inclusive
    
    product = trader_df.groupby('Portfolio').get_group(portfolio).groupby('Product').get_group(currency)
    
    start = nearest_after(product.Timestamp,start)
    end = pd.to_datetime(end,dayfirst=True)
    
    
    
    try:
        product = product[(product['Timestamp'] >= start) & (product['Timestamp'] <= end)]
    except TypeError:
        print('There is no trade for ' + currency + ' within the given time range.')
        return None
        
    #only filter those trades within the selected time range for both transaction and bloomberg datasets
    product = product[(product['Timestamp'] >= start) & (product['Timestamp'] <= end)]
    df = df[(df.index >= start) & (df.index <= end)]
    df = df.sort_index()
    
    
    result = pd.DataFrame()
    result['Date'] = df.index
    result.set_index('Date', inplace=True)
    result['PnL'] = np.nan
    
    profit = 0 
    close_price = 0
    old_entry_price = 0
    new_entry_price = 0
    size = 0 #sum of size of products for previous days 
    new_trade_size = 0
    
    #for everyday
    for j in df.index:
        #print(j)
        
        #for all trades on that day
        temp = product[product['Timestamp']==j]
        
        if not temp.empty:
        #if there is trade
            #print('yes')
            
            for i in range(temp.shape[0]):
                close_price = df.ix[temp.iloc[i,10]][currency]
                #print('the close price is ' + str(close_price))
                new_entry_price = temp.iloc[i,4]
                #print('the new entry price is ' + str(new_entry_price))
                new_size = temp.iloc[i,12]
                #print('the new size is ' + str(new_size))
                profit = profit + (close_price - new_entry_price) * new_size + (close_price - old_entry_price)* size
                old_entry_price = close_price
                #print('old entry price assignment done')
                size = size + new_size
                #print('size assignment done')
            
            result.at[j, 'PnL'] = profit
            profit = 0
            
        else: 
            #print('No')
            close_price = df.ix[j][currency]
            #print("close price on this no-trade day is " + str(close_price))
            profit = (close_price - old_entry_price)*size
            result.at[j, 'PnL'] = profit
            
    return result 
        

### PnL function for limited time range

In [6]:
#find the nearest date before a given date
def nearest_before(ls,base):
    base = get_datetime(base)
    later = filter(lambda d: d < base, ls)
    try:
        return max(later)
    except ValueError:
        return None 

In [7]:
def product_position_til(date,trader_df,portfolio,currency):
    
    #this function calculates the overall position of one product in one portfolio before input date
    #trades made on input date itself are not included 
    product = trader_df.groupby('Portfolio').get_group(portfolio).groupby('Product').get_group(currency)
    date = nearest_before(product.Timestamp,date)
    #print(date)
    
    try:
        product = product[(product['Timestamp'] <= date)]
    except TypeError:
        return 0
    
    result = product.Size.sum()
    
    return result

In [8]:
def pnl_product2(start,end,trader_df,portfolio,currency,df):
    #start is the nearest date after the given starting date when there is a trade (dd/mm/yy)
    # start will be taken as the starting point 
    #all previous trade will be ignored

    
    #input example: '1/9/2019', '10/9/2019', A1_KWN, bloomberg_df
    #input dates are inclusive
    
    product = trader_df.groupby('Portfolio').get_group(portfolio).groupby('Product').get_group(currency)
    size = product_position_til(start,trader_df,portfolio,currency)
                
    if size == 0:
        start = nearest_after(product.Timestamp,start)
        #print (start)
    else:
        start = pd.to_datetime(start,dayfirst=True)
        #print('size is ' + str(size))
    
    end = pd.to_datetime(end,dayfirst=True)
    
    try:
        product = product[(product['Timestamp'] >= start) & (product['Timestamp'] <= end)]
    except TypeError:
        #print('There is no trade for ' + currency + ' within the given time range.')
        return None
        
    #only filter those trades within the selected time range for both transaction and bloomberg datasets
    product = product[(product['Timestamp'] >= start) & (product['Timestamp'] <= end)]
    
    temp = start-timedelta(1)
    if temp in df.index:
        previous_close = df.loc[start-timedelta(1),currency]
    else:
        temp_str = str(temp.day)+'/'+str(temp.month)+'/'+str(temp.year)
        temp = nearest_before(df.index,temp_str)
        previous_close = df.loc[temp,currency]
        
    df = df[(df.index >= start) & (df.index <= end)]
    df = df.sort_index()
    #print(df)
    
    result = pd.DataFrame()
    result['Date'] = df.index
    result.set_index('Date', inplace=True)
    result['PnL'] = np.nan
    
    profit = 0 
    close_price = 0
    if size == 0:
        old_entry_price = 0
    else:
        old_entry_price = previous_close
        #print('previous close price is ' + str(previous_close))
    new_entry_price = 0
    #original size is defined previously; new_trade_size is the trades made on the day
    new_trade_size = 0
    
    
    
    
    #for everyday
    for j in df.index:
        #print(j)
        
        #for all trades on that day
        temp = product[product['Timestamp']==j]
        
        if not temp.empty:
        #if there is trade
            #print('yes')
            
            for i in range(temp.shape[0]):
                close_price = df.ix[temp.iloc[i,10]][currency]
                #print('the close price is ' + str(close_price))
                new_entry_price = temp.iloc[i,4]
                #print('the new entry price is ' + str(new_entry_price))
                new_size = temp.iloc[i,12]
                #print('the new size is ' + str(new_size))
                profit = profit + (close_price - new_entry_price) * new_size + (close_price - old_entry_price)* size
                old_entry_price = close_price
                #print('old entry price assignment done')
                size = size + new_size
                #print('size assignment done')
                #print('new size is ' + str(size))
            
            result.at[j, 'PnL'] = profit
            profit = 0
            
        else: 
            #print('No')
            close_price = df.ix[j][currency]
            #print("close price on this no-trade day is " + str(close_price))
            profit = (close_price - old_entry_price)*size
            result.at[j, 'PnL'] = profit
            
    
    return result 


In [64]:
pnl_product2('5/09/2019','13/09/2019',transaction_A,'A1','KWN',df)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated


Unnamed: 0_level_0,PnL
Date,Unnamed: 1_level_1
2019-09-09,133.0
2019-09-10,118.35
2019-09-11,-21.5
2019-09-12,-219.75
2019-09-13,-506.45


In [9]:
def pnl_portfolio(start,end,trader_df,portfolio,df):
    products = trader_df.groupby('Portfolio').get_group(portfolio)['Product'].unique()
    
    for i in range(len(products)):
        currency = products[i]
        if i == 0:
            result = pnl_product2(start,end,trader_df,portfolio,currency,df)
        else:
            temp = pnl_product2(start,end,trader_df,portfolio,currency,df)
            try:
                result = pd.concat([result, temp]).groupby('Date', as_index=True).sum()
            except ValueError:
                return None
    return result
        
        

In [10]:
def pnl_trader(start,end,trader_df,df):
    portfolios = trader_df['Portfolio'].unique()
    
    for i in range(len(portfolios)):
        portfolio = portfolios[i]
        if i == 0:
            result = pnl_portfolio(start,end,trader_df,portfolio,df)
        else:
            temp = pnl_portfolio(start,end,trader_df,portfolio,df)
            try:
                result = pd.concat([result, temp]).groupby('Date', as_index=True).sum()
            except ValueError:
                return None
    return result    

               

In [11]:
pnl_trader('5/09/2019','10/09/2019',transaction_A,df)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated


Unnamed: 0_level_0,PnL
Date,Unnamed: 1_level_1
2019-09-05,3.565
2019-09-06,-2.364
2019-09-09,128.959
2019-09-10,110.978


In [12]:
from math import sqrt
def sharpe_ratio(pnl_df):
    mu = pnl_df.PnL.mean()
    sd = pnl_df.PnL.std()
    try:
        result = mu/sd
    except ZeroDivisionError:
        return 0
    
    return result

In [13]:
def sortino_ratio(pnl_df):
    mu = pnl_df.PnL.mean()
    sd = pnl_df.loc[(pnl_df['PnL'] <= 0)].PnL.std()
    try:
        result = mu/sd
    except ZeroDivisionError:
        return 0
    return result
                    

In [70]:
from fractions import Fraction
def hit_ratio(pnl_df):
    winning = pnl_df.loc[(pnl_df['PnL'] > 0)].shape[0]
    losing = pnl_df.loc[(pnl_df['PnL'] <= 0)].shape[0]
    try:
        result = winning/losing
        return result
    except ZeroDivisionError:
        print ("No losing trade")
        return winning


In [71]:
trial = pnl_trader('20/08/2019','15/09/2019',transaction_A,df)
trial

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated


Unnamed: 0_level_0,PnL
Date,Unnamed: 1_level_1
2019-08-28,2.616
2019-08-29,2.195
2019-08-30,18.797
2019-09-02,-8.056
2019-09-03,1.074
2019-09-04,1.182
2019-09-05,2.347
2019-09-06,-3.582
2019-09-09,127.741
2019-09-10,109.76
