# Understanding Sharpe Ratio 

## And other Important Things For the Competition

### Notebooks involved in this research

1. https://www.kaggle.com/code/chumajin/easy-to-understand-the-competition
2. https://www.kaggle.com/code/tensorchoko/jpx-eda-model-jp-en

## Importing packages

In [1]:
# Import Dependencies
%matplotlib inline

# Start Python Imports
import math, time, random, datetime

# Data Manipulation
import numpy as np
import pandas as pd

# Visualization 
import matplotlib.pyplot as plt
import missingno
import seaborn as sns
plt.style.use('seaborn-whitegrid')

# Preprocessing
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, label_binarize

# Let's be rebels and ignore warnings for now
import warnings
warnings.filterwarnings('ignore')

## Loading the data

In [2]:
#Import train & test data & more
stock_list = pd.read_csv('data/stock_list.csv')
df_financials = pd.read_csv('data/train_files/financials.csv')
df_options = pd.read_csv('data/train_files/options.csv')
df_secondary_stock_prices = pd.read_csv('data/train_files/secondary_stock_prices.csv')
df_stock_prices = pd.read_csv('data/train_files/stock_prices.csv')
df_trades = pd.read_csv('data/train_files/trades.csv')
sample_submission = pd.read_csv('data/example_test_files/sample_submission.csv')

sample_submission.nunique()


Date                56
SecuritiesCode    2000
Rank              2000
dtype: int64

In [4]:
df_stock_prices

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.000730
1,20170104_1332,2017-01-04,1332,568.0,576.0,563.0,571.0,2798500,1.0,,False,0.012324
2,20170104_1333,2017-01-04,1333,3150.0,3210.0,3140.0,3210.0,270800,1.0,,False,0.006154
3,20170104_1376,2017-01-04,1376,1510.0,1550.0,1510.0,1550.0,11300,1.0,,False,0.011053
4,20170104_1377,2017-01-04,1377,3270.0,3350.0,3270.0,3330.0,150800,1.0,,False,0.003026
...,...,...,...,...,...,...,...,...,...,...,...,...
2332526,20211203_9990,2021-12-03,9990,514.0,528.0,513.0,528.0,44200,1.0,,False,0.034816
2332527,20211203_9991,2021-12-03,9991,782.0,794.0,782.0,794.0,35900,1.0,,False,0.025478
2332528,20211203_9993,2021-12-03,9993,1690.0,1690.0,1645.0,1645.0,7200,1.0,,False,-0.004302
2332529,20211203_9994,2021-12-03,9994,2388.0,2396.0,2380.0,2389.0,6500,1.0,,False,0.009098


### Let's convert stock_price's date to DateTime Format

In [7]:
df_stock_prices["Date"] = pd.to_datetime(df_stock_prices["Date"])

#### We will now calculate the TARGET value, manually

In [10]:
tmp = df_stock_prices[df_stock_prices["SecuritiesCode"]==1301].reset_index(drop=True)
tmp.head(10)

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.00073
1,20170105_1301,2017-01-05,1301,2743.0,2747.0,2735.0,2738.0,17900,1.0,,False,0.00292
2,20170106_1301,2017-01-06,1301,2734.0,2744.0,2720.0,2740.0,19900,1.0,,False,-0.001092
3,20170110_1301,2017-01-10,1301,2745.0,2754.0,2735.0,2748.0,24200,1.0,,False,-0.0051
4,20170111_1301,2017-01-11,1301,2748.0,2752.0,2737.0,2745.0,9300,1.0,,False,-0.003295
5,20170112_1301,2017-01-12,1301,2745.0,2747.0,2703.0,2731.0,28700,1.0,,False,-0.006613
6,20170113_1301,2017-01-13,1301,2707.0,2730.0,2707.0,2722.0,19400,1.0,,False,-0.006657
7,20170116_1301,2017-01-16,1301,2725.0,2725.0,2696.0,2704.0,20100,1.0,,False,0.002978
8,20170117_1301,2017-01-17,1301,2702.0,2704.0,2682.0,2686.0,18400,1.0,,False,0.001856
9,20170118_1301,2017-01-18,1301,2689.0,2695.0,2681.0,2694.0,12100,1.0,,False,0.014079


In [13]:
tmp["Close_Shift_1"] = tmp["Close"].shift(-1)
tmp["Close_Shift_2"] = tmp["Close"].shift(-2)

tmp["ROC"] = (tmp["Close_Shift_2"] - tmp["Close_Shift_1"]) / tmp["Close_Shift_1"]
tmp

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,Close_Shift_1,Close_Shift_2,ROC
0,20170104_1301,2017-01-04,1301,2734.0,2755.0,2730.0,2742.0,31400,1.0,,False,0.000730,2738.0,2740.0,0.000730
1,20170105_1301,2017-01-05,1301,2743.0,2747.0,2735.0,2738.0,17900,1.0,,False,0.002920,2740.0,2748.0,0.002920
2,20170106_1301,2017-01-06,1301,2734.0,2744.0,2720.0,2740.0,19900,1.0,,False,-0.001092,2748.0,2745.0,-0.001092
3,20170110_1301,2017-01-10,1301,2745.0,2754.0,2735.0,2748.0,24200,1.0,,False,-0.005100,2745.0,2731.0,-0.005100
4,20170111_1301,2017-01-11,1301,2748.0,2752.0,2737.0,2745.0,9300,1.0,,False,-0.003295,2731.0,2722.0,-0.003295
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1197,20211129_1301,2021-11-29,1301,2975.0,2984.0,2951.0,2951.0,14300,1.0,,False,0.003793,2900.0,2911.0,0.003793
1198,20211130_1301,2021-11-30,1301,2953.0,2997.0,2900.0,2900.0,20500,1.0,,False,0.007558,2911.0,2933.0,0.007558
1199,20211201_1301,2021-12-01,1301,2909.0,2936.0,2909.0,2911.0,10600,1.0,,False,0.016706,2933.0,2982.0,0.016706
1200,20211202_1301,2021-12-02,1301,2949.0,2973.0,2933.0,2933.0,15200,1.0,,False,-0.003689,2982.0,,


### We notice that the Target Value is Identical to ROC, this means to we have successfully calculated this value manually. 

#### Note that there are missing values for the last two ROC since we are peeking forward two days in time

#### Other lessons learned, you can add columns in a panda dataframe by the following syntax: name["newcolumn_name"]
#### Cool function learned : .shift(peek in the some value for a different row) and .reset_index(in the name, useful when retrieving a recurring value that is evenly spaced-out

### Now we will attempt to rank these stocks depending on their target values
#### Note that not all 2000 stocks have values for every date
#### To calculate rank, we will look at stock-prices for each date (hence the way we declare tmp2) - Note the usage of reset_index

In [14]:
tmp2 = df_stock_prices[df_stock_prices["Date"]=="2021-12-02"].reset_index(drop=True)
tmp2.head(10)

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target
0,20211202_1301,2021-12-02,1301,2949.0,2973.0,2933.0,2933.0,15200,1.0,,False,-0.003689
1,20211202_1332,2021-12-02,1332,566.0,577.0,565.0,573.0,1525800,1.0,,False,0.006838
2,20211202_1333,2021-12-02,1333,2276.0,2314.0,2267.0,2303.0,157000,1.0,,False,0.008058
3,20211202_1375,2021-12-02,1375,1220.0,1244.0,1209.0,1212.0,121600,1.0,,False,-0.008907
4,20211202_1376,2021-12-02,1376,1353.0,1373.0,1327.0,1327.0,7800,1.0,,False,0.014264
5,20211202_1377,2021-12-02,1377,3110.0,3150.0,3110.0,3110.0,88700,1.0,,False,0.009464
6,20211202_1379,2021-12-02,1379,1881.0,1904.0,1867.0,1867.0,76700,1.0,,False,-0.001058
7,20211202_1381,2021-12-02,1381,3210.0,3210.0,3125.0,3150.0,1600,1.0,,False,-0.015625
8,20211202_1407,2021-12-02,1407,6230.0,6290.0,6010.0,6070.0,374900,1.0,,False,0.004847
9,20211202_1413,2021-12-02,1413,2173.0,2173.0,2112.0,2112.0,20300,1.0,,False,-0.003186


#### Let's now do a very simple ranking - Which is simply to create a new column called "rank" and order the Securities by their target values
##### We add -1 to take into account the index starting at 0 (the ranks will be 0-1999)

In [20]:
tmp2["rank"] = tmp2["Target"].rank(ascending=False,method="first") - 1
tmp2 = tmp2.sort_values("rank").reset_index(drop=True)
tmp2.head(10)

Unnamed: 0,RowId,Date,SecuritiesCode,Open,High,Low,Close,Volume,AdjustmentFactor,ExpectedDividend,SupervisionFlag,Target,rank
0,20211202_4699,2021-12-02,4699,2037.0,2149.0,2037.0,2143.0,31000,1.0,,False,0.231481,0.0
1,20211202_4488,2021-12-02,4488,5900.0,5990.0,5690.0,5700.0,57100,1.0,,False,0.149254,1.0
2,20211202_8057,2021-12-02,8057,4710.0,4820.0,4615.0,4640.0,29800,1.0,,False,0.12311,2.0
3,20211202_9632,2021-12-02,9632,8110.0,8150.0,8000.0,8030.0,3900,1.0,,False,0.107011,3.0
4,20211202_6958,2021-12-02,6958,510.0,525.0,502.0,517.0,639200,1.0,,False,0.103113,4.0
5,20211202_6912,2021-12-02,6912,1241.0,1400.0,1202.0,1202.0,202900,1.0,,False,0.100153,5.0
6,20211202_2307,2021-12-02,2307,1444.0,1448.0,1340.0,1371.0,85200,1.0,,False,0.084016,6.0
7,20211202_2158,2021-12-02,2158,2557.0,3150.0,2388.0,3065.0,28505600,1.0,,False,0.082452,7.0
8,20211202_6182,2021-12-02,6182,1118.0,1157.0,1061.0,1061.0,258600,1.0,,False,0.07713,8.0
9,20211202_9107,2021-12-02,9107,4940.0,5440.0,4915.0,5440.0,7957200,1.0,,False,0.066775,9.0
