## **2. Portfolio construction**

- The following notebook will use the processed file created for this project (`asset_universe.csv`)
- The aim is computing the daily returns per asset, daily returns for the portfolio, initial weights, KPIs, etc.
- **Logarithmic returns** are used for calculations since they're aditive in time and more robust than arithmetic returns

#### **2.1 Importing necessary libraries**

In [1]:
import pandas as pd
import numpy as np
from src.helpers_io import read_csv_processed

#### **2.2 Loading `asset_universe.csv`**

In [2]:
asset_universe = read_csv_processed("asset_universe.csv", parse_dates=["Date"]).set_index("Date")
asset_universe

Unnamed: 0_level_0,AMZN,BZ,CL,EURUSD,FTSE,GBPUSD,GC,GSPC,IEF,IXIC,JPM,MSFT,NG,NVDA,ORCL,SI,TLT,USDJPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2019-01-02,76.956497,54.910000,46.540001,1.146171,6734.200195,1.275429,1281.000000,2510.030029,89.408447,6665.939941,81.616714,94.612610,2.958,3.377354,40.754520,15.542000,101.310341,109.667999
2019-01-03,75.014000,55.950001,47.090000,1.131811,6692.700195,1.252191,1291.800049,2447.889893,90.119125,6463.500000,80.456787,91.132011,2.945,3.173305,40.357971,15.706000,102.463249,107.441002
2019-01-04,78.769501,57.060001,47.959999,1.139108,6837.399902,1.262881,1282.699951,2531.939941,89.391357,6738.859863,83.422867,95.370491,3.044,3.376611,42.097378,15.695000,101.277229,107.807999
2019-01-07,81.475502,57.330002,48.520000,1.141044,6810.899902,1.273496,1286.800049,2549.689941,89.143028,6823.470215,83.480865,95.492126,2.944,3.555370,42.764309,15.669000,100.978592,108.522003
2019-01-08,82.829002,58.720001,49.779999,1.147974,6861.600098,1.278609,1283.199951,2574.409912,88.920418,6897.000000,83.323448,96.184494,2.967,3.466858,43.151836,15.626000,100.713196,108.615997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-20,224.919998,72.940002,69.459999,1.036495,8084.600098,1.249797,2628.699951,5930.850098,89.719093,19572.599609,232.674530,433.402924,3.748,134.670654,167.989136,29.660000,85.179314,157.643997
2024-12-23,225.059998,72.629997,69.239998,1.043308,8102.700195,1.256992,2612.300049,5974.069824,89.321884,19764.880859,233.448151,432.062744,3.656,139.639572,167.474243,29.888000,84.398026,156.533005
2024-12-24,229.050003,73.580002,70.099998,1.040583,8137.000000,1.253447,2620.000000,6040.040039,89.370331,20031.130859,237.286896,436.112854,3.946,140.189468,169.721893,29.974001,84.754929,157.164993
2024-12-27,223.750000,74.169998,70.599998,1.042318,8149.799805,1.252976,2617.199951,5970.839844,89.205635,19722.029297,236.170502,427.377319,3.514,136.980148,167.296036,29.655001,84.012222,157.748001


#### **2.3 Calculating log returns per asset**

In [3]:
# Computing log returns
log_returns_raw = pd.DataFrame(np.log(asset_universe / asset_universe.shift(1)))
log_returns_raw

  result = func(self.values, **kwargs)


Unnamed: 0_level_0,AMZN,BZ,CL,EURUSD,FTSE,GBPUSD,GC,GSPC,IEF,IXIC,JPM,MSFT,NG,NVDA,ORCL,SI,TLT,USDJPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2019-01-02,,,,,,,,,,,,,,,,,,
2019-01-03,-0.025566,0.018763,0.011748,-0.012608,-0.006182,-0.018387,0.008396,-0.025068,0.007917,-0.030840,-0.014314,-0.037482,-0.004405,-0.062319,-0.009778,0.010497,0.011316,-0.020516
2019-01-04,0.048851,0.019645,0.018307,0.006427,0.021390,0.008501,-0.007069,0.033759,-0.008108,0.041720,0.036202,0.045460,0.033064,0.062099,0.042197,-0.000701,-0.011643,0.003410
2019-01-07,0.033777,0.004721,0.011609,0.001699,-0.003883,0.008370,0.003191,0.006986,-0.002782,0.012477,0.000695,0.001275,-0.033403,0.051587,0.015718,-0.001658,-0.002953,0.006601
2019-01-08,0.016476,0.023956,0.025637,0.006054,0.007416,0.004007,-0.002802,0.009649,-0.002500,0.010718,-0.001887,0.007224,0.007782,-0.025210,0.009021,-0.002748,-0.002632,0.000866
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-20,0.007273,0.000823,-0.006458,0.001419,-0.002557,-0.005565,0.013982,0.010810,0.003678,0.010262,0.019722,-0.000984,0.044743,0.030299,0.005141,0.019267,0.005678,0.019026
2024-12-23,0.000622,-0.004259,-0.003172,0.006551,0.002236,0.005741,-0.006258,0.007261,-0.004437,0.009776,0.003319,-0.003097,-0.024853,0.036232,-0.003070,0.007658,-0.009215,-0.007072
2024-12-24,0.017573,0.012995,0.012344,-0.002615,0.004224,-0.002824,0.002943,0.010982,0.000542,0.013381,0.016310,0.009330,0.076333,0.003930,0.013332,0.002873,0.004220,0.004029
2024-12-27,-0.023411,0.007986,0.007107,0.001666,0.001572,-0.000376,-0.001069,-0.011523,-0.001845,-0.015551,-0.004716,-0.020234,-0.115947,-0.023159,-0.014396,-0.010700,-0.008802,0.003703


#### **2.4 Checking NaNs and anomalies**

- On Apr 20th, 2020, the **Crude Oil** (`ticker = CL`) plummeted to $37.63/barrel since sellers paid buyers to take the crude due to demand collapse for the COVID-19 and storage capacity shortage
- This was a **rare event** and it's considered an **outlier**

In [4]:
# Verifying where NaN values come from
display(log_returns_raw[log_returns_raw.isna().any(axis=1)])

# Identifying what dates these NaN values occured
display(asset_universe.loc["2020-04-20":"2020-04-21", "CL"])    # Historic event

Unnamed: 0_level_0,AMZN,BZ,CL,EURUSD,FTSE,GBPUSD,GC,GSPC,IEF,IXIC,JPM,MSFT,NG,NVDA,ORCL,SI,TLT,USDJPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2019-01-02,,,,,,,,,,,,,,,,,,
2020-04-20,0.007805,-0.093638,,0.000782,0.004448,-2.5e-05,0.007314,-0.018043,0.002625,-0.01039,-0.037138,-0.02002,0.093078,-0.018193,-0.013084,0.02072,0.008012,-0.002551
2020-04-21,-0.027742,-0.279761,,-0.000196,-0.030001,-0.00359,-0.013847,-0.031155,0.002781,-0.03537,-0.029433,-0.042237,-0.055021,-0.063051,-0.04943,-0.048604,0.012513,-9e-06


Date
2020-04-20   -37.630001
2020-04-21    10.010000
Name: CL, dtype: float64

#### **2.5 Dropping NaNs**

- At this point of the project, all NaNs (including the **rare event** in `CL`) will be deleted
- Later in the project I will go back to this and assess it as an **Event Study**

In [5]:
# Clean log returns
log_returns = log_returns_raw.dropna()
log_returns

Unnamed: 0_level_0,AMZN,BZ,CL,EURUSD,FTSE,GBPUSD,GC,GSPC,IEF,IXIC,JPM,MSFT,NG,NVDA,ORCL,SI,TLT,USDJPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2019-01-03,-0.025566,0.018763,0.011748,-0.012608,-0.006182,-0.018387,0.008396,-0.025068,0.007917,-0.030840,-0.014314,-0.037482,-0.004405,-0.062319,-0.009778,0.010497,0.011316,-0.020516
2019-01-04,0.048851,0.019645,0.018307,0.006427,0.021390,0.008501,-0.007069,0.033759,-0.008108,0.041720,0.036202,0.045460,0.033064,0.062099,0.042197,-0.000701,-0.011643,0.003410
2019-01-07,0.033777,0.004721,0.011609,0.001699,-0.003883,0.008370,0.003191,0.006986,-0.002782,0.012477,0.000695,0.001275,-0.033403,0.051587,0.015718,-0.001658,-0.002953,0.006601
2019-01-08,0.016476,0.023956,0.025637,0.006054,0.007416,0.004007,-0.002802,0.009649,-0.002500,0.010718,-0.001887,0.007224,0.007782,-0.025210,0.009021,-0.002748,-0.002632,0.000866
2019-01-09,0.001713,0.045281,0.050530,-0.002064,0.006537,-0.003777,0.004743,0.004090,0.000578,0.008673,-0.001692,0.014198,0.005713,0.019476,-0.002091,0.001726,-0.001566,0.001472
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-20,0.007273,0.000823,-0.006458,0.001419,-0.002557,-0.005565,0.013982,0.010810,0.003678,0.010262,0.019722,-0.000984,0.044743,0.030299,0.005141,0.019267,0.005678,0.019026
2024-12-23,0.000622,-0.004259,-0.003172,0.006551,0.002236,0.005741,-0.006258,0.007261,-0.004437,0.009776,0.003319,-0.003097,-0.024853,0.036232,-0.003070,0.007658,-0.009215,-0.007072
2024-12-24,0.017573,0.012995,0.012344,-0.002615,0.004224,-0.002824,0.002943,0.010982,0.000542,0.013381,0.016310,0.009330,0.076333,0.003930,0.013332,0.002873,0.004220,0.004029
2024-12-27,-0.023411,0.007986,0.007107,0.001666,0.001572,-0.000376,-0.001069,-0.011523,-0.001845,-0.015551,-0.004716,-0.020234,-0.115947,-0.023159,-0.014396,-0.010700,-0.008802,0.003703


In [6]:
# Checking NaNs
log_returns.isna().sum()

AMZN      0
BZ        0
CL        0
EURUSD    0
FTSE      0
GBPUSD    0
GC        0
GSPC      0
IEF       0
IXIC      0
JPM       0
MSFT      0
NG        0
NVDA      0
ORCL      0
SI        0
TLT       0
USDJPY    0
dtype: int64