In [949]:
# Import libraries
import numpy as np
import pandas as pd

# Set a max number of columns for optimal representation of the dataset
pd.set_option("display.max_columns", 120)

# Define start and end dates
START = "2022-01-01"
END = "2025-12-18"

In [None]:
# Use Yahoo Finance through yfinance: US tickers (real market yh_df)
tickers = ["SPY", "QQQ", "TLT", "GLD", "EEM"]
# SPY : S&P 500 index
# QQQ : Nasdaq-100 index
# TLT : U.S. Treasury bonds with 20+ year maturity
# GLD : Physical gold prices
# EEM : MSCI Emerging Markets index

try:
    import yfinance as yf
except Exception as e:
    yf = None
    print("Could not import yfinance:", type(e).__name__, str(e))

if yf is not None:
    try:
        yh_df = yf.download(tickers, start=START, end=END, auto_adjust=True, progress=False)
    except Exception as e:
        yh_df = pd.DataFrame()
        print("yfinance download failed:", type(e).__name__, str(e))
else:
    yh_df = pd.DataFrame()

# This code makes your script robust to missing libraries, network/API failures, and environment
# differences, while guaranteeing that the rest of your pipeline can still run safely.

# Convert to long format: date, ticker, close, volume
if isinstance(yh_df, pd.DataFrame) and yh_df.shape[0] > 0:
    if isinstance(yh_df.columns, pd.MultiIndex):
        close = yh_df["Close"].copy()
        vol = yh_df["Volume"].copy()
    else:
        close = yh_df[["Close"]].rename(columns={"Close": tickers[0]})
        vol = yh_df[["Volume"]].rename(columns={"Volume": tickers[0]})

    close.index.name = "date"
    vol.index.name = "date"

    us_close_long = close.reset_index().melt(id_vars="date", var_name="ticker", value_name="close")
    us_vol_long = vol.reset_index().melt(id_vars="date", var_name="ticker", value_name="volume")
    us_mkt = us_close_long.merge(us_vol_long, on=["date","ticker"], how="inner").dropna(subset=["close"])
else:
    us_mkt = pd.DataFrame(columns=["date","ticker","close","volume"])

us_mkt.head(), us_mkt.shape

(        date ticker      close    volume
 0 2022-01-03    EEM  44.624969  27572700
 1 2022-01-04    EEM  44.470772  24579500
 2 2022-01-05    EEM  43.745171  46425100
 3 2022-01-06    EEM  43.944706  34288700
 4 2022-01-07    EEM  44.343796  32640900,
 (4970, 4))

In [None]:
# BCRP: daily exchange rate PEN/USD buy & sell (official API)
# Codes:
# - PD04637PD: USD/PEN (buy)
# - PD04638PD: USD/PEN (sell)

import requests

bcrp_url = f"https://estadisticas.bcrp.gob.pe/estadisticas/series/api/PD04637PD-PD04638PD/json/{START}/{END}/esp"
try:
    r = requests.get(bcrp_url, timeout=30)
    r.raise_for_status()
    bcrp_obj = r.json()
except Exception as e:
    bcrp_obj = {"periods": []}
    print("BCRP request failed:", type(e).__name__, str(e))

periods = bcrp_obj.get("periods", [])
rows = []
for p in periods:
    name = p.get("name")
    vals = p.get("values", [])
    if isinstance(vals, str):
        vals = [vals]
    if name is None or not isinstance(vals, list) or len(vals) < 2:
        continue
    rows.append([name, vals[0], vals[1]])

# We have download the exchange rate PEN/USD from BCRP website, then, it is made a cleaning process to use the data


df = pd.DataFrame(rows, columns=["date_raw", "PENUSD_buy", "PENUSD_sell"])
df

Unnamed: 0,date_raw,PENUSD_buy,PENUSD_sell
0,03.Ene.22,3.98366666666667,3.98883333333333
1,04.Ene.22,3.9595,3.964
2,05.Ene.22,3.952,3.95633333333333
3,06.Ene.22,3.96716666666667,3.96966666666667
4,07.Ene.22,3.94516666666667,3.94816666666667
...,...,...,...
983,12.Dic.25,3.36685714285714,3.36885714285714
984,15.Dic.25,3.36871428571429,3.3705
985,16.Dic.25,3.36985714285714,3.37142857142857
986,17.Dic.25,3.36771428571429,3.36914285714286


### 3.2.2 From NumPy array to Series

Using `us_mkt`:

1. Filter to `ticker == "SPY"`.
2. Take `close` as a NumPy array.
3. Create a Series indexed by `date` named `SPY_close_series`.
4. Compute the mean/min/max with Series methods.

In [952]:
### 1 Filter to ticker == 'SPY'

spy_df = us_mkt[us_mkt["ticker"] == "SPY"]

print(spy_df)

           date ticker       close     volume
2982 2022-01-03    SPY  451.875183   72668200
2983 2022-01-04    SPY  451.723816   71178700
2984 2022-01-05    SPY  443.049683  104538900
2985 2022-01-06    SPY  442.633514   86858900
2986 2022-01-07    SPY  440.883545   85111600
...         ...    ...         ...        ...
3971 2025-12-11    SPY  687.139526   86173700
3972 2025-12-12    SPY  679.751404  113160300
3973 2025-12-15    SPY  678.724426   90811000
3974 2025-12-16    SPY  676.869934  122030600
3975 2025-12-17    SPY  669.421936  110625200

[994 rows x 4 columns]


In [953]:
### 2. Take Close as a Numpy Array
close_SPY = np.array(spy_df["close"])
close_SPY[0:20]


array([451.87518311, 451.72381592, 443.04968262, 442.6335144 ,
       440.88354492, 440.33493042, 444.34567261, 445.54690552,
       439.40792847, 439.58773804, 431.80273438, 427.31906128,
       422.5894165 , 414.29379272, 416.05325317, 410.97366333,
       409.94256592, 407.91824341, 418.04904175, 425.57858276])

In [954]:
### 3. Create a series indexed by date named SPY_close_series
SPY_close_series = pd.Series(close_SPY, index= spy_df['date'], name="SPY_close_series")

SPY_close_series



Unnamed: 0_level_0,SPY_close_series
date,Unnamed: 1_level_1
2022-01-03,451.875183
2022-01-04,451.723816
2022-01-05,443.049683
2022-01-06,442.633514
2022-01-07,440.883545
...,...
2025-12-11,687.139526
2025-12-12,679.751404
2025-12-15,678.724426
2025-12-16,676.869934


In [955]:
### 4. Compute the mean/min/max with Series methods.
def mean_min_max(series): ### To reduce the work, we created a function that gives us the 3 methods on the series

    return series.mean(), series.min(), series.max()

results = mean_min_max(SPY_close_series)

print(' The mean of the serie is :{0} results'.format(results[0])) ### Using the .format  to add a variable or function in to the print
print(' The min of the serie is :{0} results'.format(results[1]))
print(' The max of the serie is :{0} results'.format(results[2]))



 The mean of the serie is :485.6085830473564 results
 The min of the serie is :341.1820373535156 results
 The max of the serie is :687.1395263671875 results


### 3.2.3 From Dictionary to Series

Using `us_mkt`:

1. Compute the **last available close** for each ticker in `tickers`.
2. Store it in a dict `{ticker: last_close}`.
3. Convert to a Series and sort descending.

In [956]:
# ### 1. Compute the last available close for each ticker in tickers
us_mkt_EEM_last_close = us_mkt[us_mkt['ticker'] == 'EEM']['close'].iloc[-1] ### We filter by each specific ticker, take the value from the 'close' column, and take the last value according to the index position -1
us_mkt_GLD_last_close = us_mkt[us_mkt['ticker'] == 'GLD']['close'].iloc[-1]
us_mkt_QQQ_last_close = us_mkt[us_mkt['ticker'] == 'QQQ']['close'].iloc[-1]
us_mkt_SPY_last_close = us_mkt[us_mkt['ticker'] == 'SPY']['close'].iloc[-1]
us_mkt_TLT_last_close = us_mkt[us_mkt['ticker'] == 'TLT']['close'].iloc[-1]

print(f'The last close price in EEM : {us_mkt_EEM_last_close:.3f}') ### To take only 3 decimal places and aplicate f-strings
print(f'The last close price in GLD : {us_mkt_GLD_last_close:.3f}')
print(f'The last close price in QQQ : {us_mkt_QQQ_last_close:.3f}')
print(f'The last close price in SPY : {us_mkt_SPY_last_close:.3f}')
print(f'The last close price in TLT : {us_mkt_TLT_last_close:.3f}')


The last close price in EEM : 52.600
The last close price in GLD : 399.290
The last close price in QQQ : 599.637
The last close price in SPY : 669.422
The last close price in TLT : 87.460


In [957]:
### 2. Store it in a dict {ticker: last_close}
dict_last_close = {
    'EEM': us_mkt_EEM_last_close,
    'GLD': us_mkt_GLD_last_close,
    'QQQ': us_mkt_QQQ_last_close,
    'SPY': us_mkt_SPY_last_close,
    'TLT': us_mkt_TLT_last_close
}
print(dict_last_close)

{'EEM': np.float64(52.599998474121094), 'GLD': np.float64(399.2900085449219), 'QQQ': np.float64(599.6373901367188), 'SPY': np.float64(669.4219360351562), 'TLT': np.float64(87.45963287353516)}


In [958]:
  ### 3 Convert a series and sort descending
serie_last_close = pd.Series(dict_last_close) ### Convert to dictionary to Series
serie_last_close = serie_last_close.sort_values(ascending=False)
print(serie_last_close)

SPY    669.421936
QQQ    599.637390
GLD    399.290009
TLT     87.459633
EEM     52.599998
dtype: float64


### 3.2.4 Series vs NumPy

Goal: show why pandas alignment matters.

1. Create two Series indexed by date:
   - df mid-rate from `df`
   - SPY close from `us_mkt`
2. Combine them into a yh_dfFrame (pandas aligns on dates).
3. Separately, build two NumPy arrays by truncating to the same length.
4. In markdown: explain why alignment is safer.


In [959]:
yh_df.head()


Price,Close,Close,Close,Close,Close,High,High,High,High,High,Low,Low,Low,Low,Low,Open,Open,Open,Open,Open,Volume,Volume,Volume,Volume,Volume
Ticker,EEM,GLD,QQQ,SPY,TLT,EEM,GLD,QQQ,SPY,TLT,EEM,GLD,QQQ,SPY,TLT,EEM,GLD,QQQ,SPY,TLT,EEM,GLD,QQQ,SPY,TLT
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2
2022-01-03,44.624969,168.330002,391.679474,451.875183,125.295341,44.679388,169.009995,391.93301,452.007626,127.587647,44.244023,168.0,386.99899,448.223948,125.277972,44.516129,168.860001,389.114948,450.541433,127.12745,27572700,9014400,40575900,72668200,33860400
2022-01-04,44.470772,169.570007,386.599121,451.723816,124.774368,44.679384,169.720001,392.264468,454.022422,125.147743,44.380072,168.729996,383.4983,449.860353,123.94081,44.679384,168.899994,392.225455,453.303514,124.696225,24579500,6965600,58027200,71178700,21996400
2022-01-05,43.745171,169.059998,374.722412,443.049683,124.097107,44.579623,170.929993,386.033615,452.130514,125.173801,43.736102,168.899994,374.468876,442.955085,123.914772,44.262167,170.619995,384.912223,451.354853,125.156431,46425100,8715600,75739800,104538900,20911700
2022-01-06,43.944706,166.990005,374.459076,442.633514,124.418365,44.153318,167.75,377.706186,445.357767,124.548618,43.681672,166.860001,370.66594,440.259248,123.550068,43.863075,167.160004,372.898935,442.58623,123.81056,34288700,10902700,70814300,86858900,18996400
2022-01-07,44.343796,167.75,370.402679,440.883545,123.524063,44.425427,168.009995,376.155812,443.825369,124.375005,43.990062,166.860001,368.628014,439.521419,122.916257,44.162393,167.369995,374.468873,442.64297,124.279491,32640900,8191900,72652300,85111600,18756800


In [960]:
df.head()

Unnamed: 0,date_raw,PENUSD_buy,PENUSD_sell
0,03.Ene.22,3.98366666666667,3.98883333333333
1,04.Ene.22,3.9595,3.964
2,05.Ene.22,3.952,3.95633333333333
3,06.Ene.22,3.96716666666667,3.96966666666667
4,07.Ene.22,3.94516666666667,3.94816666666667


In [961]:
### 1.Create two Series indexed by date: df mid-rate from df , SPY close from us_mkt
df_mid_rate = df.set_index("date_raw")["PENUSD_buy"]
print(df_mid_rate.head())
print(df_mid_rate.shape) ### we observe its shape
print(type(df_mid_rate)) ### we observe its type
###We previously created a SPY closing price series in an earlier code section, and we're now using it.
print(f"___"*30)
print(SPY_close_series.head())
print(SPY_close_series.shape) ### we observe its shape
print(type(SPY_close_series)) ### we observe its type

date_raw
03.Ene.22    3.98366666666667
04.Ene.22              3.9595
05.Ene.22               3.952
06.Ene.22    3.96716666666667
07.Ene.22    3.94516666666667
Name: PENUSD_buy, dtype: object
(988,)
<class 'pandas.core.series.Series'>
__________________________________________________________________________________________
date
2022-01-03    451.875183
2022-01-04    451.723816
2022-01-05    443.049683
2022-01-06    442.633514
2022-01-07    440.883545
Name: SPY_close_series, dtype: float64
(994,)
<class 'pandas.core.series.Series'>


In [962]:
df_mid_rate[0:20]


Unnamed: 0_level_0,PENUSD_buy
date_raw,Unnamed: 1_level_1
03.Ene.22,3.98366666666667
04.Ene.22,3.9595
05.Ene.22,3.952
06.Ene.22,3.96716666666667
07.Ene.22,3.94516666666667
10.Ene.22,3.92766666666667
11.Ene.22,3.91633333333333
12.Ene.22,3.89233333333333
13.Ene.22,3.89483333333333
14.Ene.22,3.87033333333333


In [963]:
### Changing the date type to match the yh finance dataframe

print(type(df_mid_rate.index))
print(type(SPY_close_series.index)) ### There is a problem with the date format that delivers a bad concatenate



### Convert text index "03.Ene.22" to datetime
### Create a dictionary
month_map = {
    "Ene": "Jan", "Feb": "Feb", "Mar": "Mar", "Abr": "Apr",
    "May": "May", "Jun": "Jun", "Jul": "Jul", "Ago": "Aug",
    "Sep": "Sep", "Oct": "Oct", "Nov": "Nov", "Dic": "Dec"
}

idx = df_mid_rate.index.astype(str)

### Replace ".Ene." -> ".Jan." to then change it to datetime type
for es, en in month_map.items():
    idx = idx.str.replace(f".{es}.", f".{en}.", regex=False)

df_mid_rate.index = pd.to_datetime(idx, format="%d.%b.%y", errors="coerce")

### Convert values to numeric , in case they come as text
df_mid_rate = pd.to_numeric(df_mid_rate, errors="coerce")

### Clean up to remove any null values that may have been found
df_mid_rate = df_mid_rate[~df_mid_rate.index.isna()].sort_index() ### Remove NaN indices
df_mid_rate = df_mid_rate.dropna()

print("\n__________________________________________________________")
print(df_mid_rate.head())
print(df_mid_rate.shape)

print(type(df_mid_rate.index))

<class 'pandas.core.indexes.base.Index'>
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>

__________________________________________________________
date_raw
2022-01-03    3.983667
2022-01-04    3.959500
2022-01-05    3.952000
2022-01-06    3.967167
2022-01-07    3.945167
Name: PENUSD_buy, dtype: float64
(902,)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [964]:
### 2. Combine them into a yh_dfFrame (pandas aligns on dates).
yh_dfFrame = pd.concat([df_mid_rate, SPY_close_series], axis=1, join = 'inner') ### We concatenated the dataframes after resolving the index discrepancy. We applied join='inner' to only display values with matching indices.
print( yh_dfFrame.shape) ### The resulting dataframe has a smaller shape since rows with non-matching indices were eliminated.
yh_dfFrame



(875, 2)


Unnamed: 0,PENUSD_buy,SPY_close_series
2022-01-03,3.983667,451.875183
2022-01-04,3.959500,451.723816
2022-01-05,3.952000,443.049683
2022-01-06,3.967167,442.633514
2022-01-07,3.945167,440.883545
...,...,...
2025-12-11,3.365000,687.139526
2025-12-12,3.366857,679.751404
2025-12-15,3.368714,678.724426
2025-12-16,3.369857,676.869934


In [965]:
### 3. Separately, build two NumPy arrays by truncating to the same length.
df_mid_rate_np = np.array(df_mid_rate)
SPY_close_series_np = np.array(SPY_close_series.values)

print(type(df_mid_rate_np))
print(type(SPY_close_series_np))
print(df_mid_rate_np.shape)
print(SPY_close_series_np.shape)
print('After truncating the dfs to the same length')

### Truncating to the same legth

min_length = min(len(df_mid_rate_np), len(SPY_close_series_np)) ### We take the minimum length among the dataframes.
df_mid_rate_np = df_mid_rate_np[:min_length] ### We truncate to the minimum length found
SPY_close_series_np = SPY_close_series_np[:min_length]

print(df_mid_rate_np.shape)
print(SPY_close_series_np.shape)

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
(902,)
(994,)
After truncating the dfs to the same length
(902,)
(902,)


4. Comment: By aligning indices beforehand, we can verify whether the subsequent joining of a new DataFrame was performed correctly. If the operation was executed properly, the resulting DataFrame will maintain the same number of rows as the originally aligned DataFrames.
A discrepancy in the row count of the final DataFrame compared to the original length serves as a clear signal that the join did not occur as expected. This difference alerts us to potential inconsistencies in the data or indices.
In section 2), we precisely observed this scenario: the join resulted in a DataFrame with different dimensions than the originals. This discrepancy indicated that there were differences in the indices between both DataFrames, demonstrating the importance of the alignment process.

In [966]:
yh_dfFrame.shape

(875, 2)

### 3.3.6 Dealing with Nulls
Using `us_mkt`:

1. Copy `us_mkt` to `us_mkt_nan`.
2. Set 1% of `close` to NaN (fixed random seed).
3. Create:
   - `us_drop`: drop NaNs
   - `us_fill`: fill NaNs with ticker-specific median close
4. Compare shapes.

In [None]:
# Verify and count missing values per column on a dataframe
us_mkt.isnull().sum()

Unnamed: 0,0
date,0
ticker,0
close,0
volume,0


In [968]:
### 1. Copy us_mkt to us_mkt_nan
us_mkt_nan = us_mkt.copy()
print(us_mkt.shape)
### 2. Set 1% of close to Nan(fixed random seed)

np.random.seed(42) ### Set a random seed for to others can replicate the results



random_indices = np.random.choice(us_mkt_nan.index, size=int(0.01 * len(us_mkt_nan)), replace=False) ### Select random indices

print(random_indices)

us_mkt_nan.loc[random_indices, 'close'] = np.nan ### Replace these values with np.nan.

us_mkt_nan.isnull().sum() ### 10% of shape us_mkt.

(4970, 4)
[ 856 3657 4602 4368 3868 2295 4239 2634  350 3017 1351  724 1815  287
  841 1360 2364 1295 2269 2211 1519 2114  230 2447 2063 2619 3924 2827
 3729 3230 1684 3967 3747 2525 3109 2463 1936 3142 4342 2647 3252 1784
  561  151 4815 4196 2650 2103 3980]


Unnamed: 0,0
date,0
ticker,0
close,49
volume,0


In [969]:
### Create a us_drope
us_drop = us_mkt_nan.copy() ### We made a copy to avoid changing the original dataframe

us_drop.dropna(inplace=True)

print(us_drop.shape)

### Create a us_fill
us_fill = us_mkt_nan.copy() ###  We made a copy to avoid changing the original dataframe

us_fill['close'] = us_fill['close'].fillna(us_fill.groupby('ticker')['close'].transform('median')) ### we replace the nan values for the median values for each ticker

print(us_fill.shape)

print('The shape of us_fill is larger than us_drop.')

(4921, 4)
(4970, 4)
The shape of us_fill is larger than us_drop.


### 3.3.7 Duplicates

1. Create `dup_df` by stacking the last 5 rows of `us_mkt` twice.
2. Detect duplicates using `.duplicated()`.
3. Remove them using `.drop_duplicates()`.

In [970]:
### 1. Create dup_df by stacking the last 5 rows of us_mkt twice

dup_df = pd.concat([us_mkt.tail(5),us_mkt.tail(5)]) ### tail take the last x values

dup_df.head() ###Since dup_df is a long format dataframe, dup_df  only takes the last 5 'TLT' values.



Unnamed: 0,date,ticker,close,volume
4965,2025-12-11,TLT,87.848114,26778700
4966,2025-12-12,TLT,87.001404,47030100
4967,2025-12-15,TLT,87.06118,28611800
4968,2025-12-16,TLT,87.539314,41018700
4969,2025-12-17,TLT,87.459633,24668300


In [971]:
### 2. Detect duplicates using .duplicated()
duplicates_us_mkt = dup_df.duplicated()
print(duplicates_us_mkt)
print('We observed True values in duplicates_us_mkt, which means duplicates were detected.')

4965    False
4966    False
4967    False
4968    False
4969    False
4965     True
4966     True
4967     True
4968     True
4969     True
dtype: bool
We observed True values in duplicates_us_mkt, which means duplicates were detected.


In [972]:
### 3 Remove them using .drop_duplicated()
dup_df.drop_duplicates()


Unnamed: 0,date,ticker,close,volume
4965,2025-12-11,TLT,87.848114,26778700
4966,2025-12-12,TLT,87.001404,47030100
4967,2025-12-15,TLT,87.06118,28611800
4968,2025-12-16,TLT,87.539314,41018700
4969,2025-12-17,TLT,87.459633,24668300


### 3.3.8 Groupby


Using `us_mkt`:

1. Group by `ticker` and compute:
   - mean close
   - median close
   - max volume
2. Rename columns clearly.
3. Sort by mean close descending.

In [973]:
### 1. Using us_mkt
## From close
group_tickers_close = us_mkt.groupby('ticker')['close'].agg(['mean','median']) ### 'ticker' is the index we want to group by, and 'close' is the column to apply the functions to.

print(group_tickers_close)

## From volume

group_tickers_volume = us_mkt.groupby('ticker')['volume'].agg('max') ### "agg" is used to apply functions to the grouped set

print(group_tickers_volume)


              mean      median
ticker                        
EEM      40.462350   39.107405
GLD     220.130422  187.864998
QQQ     411.276967  400.214508
SPY     485.608583  460.740021
TLT      91.395622   88.549706
ticker
EEM    134225700
GLD     62025000
QQQ    198685800
SPY    256611400
TLT    131353500
Name: volume, dtype: int64


In [974]:
### 2. Rename the columns to make them  clearer.
group_tickers_close.rename(columns={'mean': 'mean_close', 'median': 'median_close'}, inplace=True) ### Renaming the columns and replacing them in place
print(group_tickers_close)

group_tickers_volume.rename('max_volume', inplace=True)
print(group_tickers_volume)

        mean_close  median_close
ticker                          
EEM      40.462350     39.107405
GLD     220.130422    187.864998
QQQ     411.276967    400.214508
SPY     485.608583    460.740021
TLT      91.395622     88.549706
ticker
EEM    134225700
GLD     62025000
QQQ    198685800
SPY    256611400
TLT    131353500
Name: max_volume, dtype: int64


In [975]:
### 3.Sort by mean close descending.
group_tickers_close.sort_values(by='mean_close', ascending=False)

Unnamed: 0_level_0,mean_close,median_close
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
SPY,485.608583,460.740021
QQQ,411.276967,400.214508
GLD,220.130422,187.864998
TLT,91.395622,88.549706
EEM,40.46235,39.107405


### 3.3.9 Reshape

1. Create a 1-row wide yh_dfFrame with last closes per ticker.
2. Convert it to long format with `melt()` into columns: `ticker`, `last_close`.
3. Pivot `us_mkt` into a wide table: index=`date`, columns=`ticker`, values=`close` (keep first 50 dates).

In [976]:
### Create a 1 - row wide yh_dfFrame with last closes per ticker
yh_dfFrame = yh_df["Close"].tail(1)
print(yh_dfFrame, '\n')
### 2. Convert it to  long format with melt into columns ['ticker, 'last_close']
yh_dfFrame2 = yh_dfFrame.melt(var_name="ticker", value_name="last_close")
print(yh_dfFrame2,'\n')
### 3. Pivot us_mkt into a wide table: index=date, columns=ticker, values=close (keep first 50 dates)
us_mkt_wide = us_mkt.pivot(index="date", columns="ticker",values="close").sort_index()

us_mkt_wide = us_mkt_wide.iloc[0:50,:] ### keep first 50 dates using iloc
print(us_mkt_wide.head(50))




Ticker            EEM         GLD        QQQ         SPY        TLT
Date                                                               
2025-12-17  52.599998  399.290009  599.63739  669.421936  87.459633 

  ticker  last_close
0    EEM   52.599998
1    GLD  399.290009
2    QQQ  599.637390
3    SPY  669.421936
4    TLT   87.459633 

ticker            EEM         GLD         QQQ         SPY         TLT
date                                                                 
2022-01-03  44.624969  168.330002  391.679474  451.875183  125.295341
2022-01-04  44.470772  169.570007  386.599121  451.723816  124.774368
2022-01-05  43.745171  169.059998  374.722412  443.049683  124.097107
2022-01-06  43.944706  166.990005  374.459076  442.633514  124.418365
2022-01-07  44.343796  167.750000  370.402679  440.883545  123.524063
2022-01-10  44.343796  168.259995  370.646454  440.334930  123.827927
2022-01-11  45.368717  170.289993  376.214325  444.345673  124.652832
2022-01-12  46.121536  170.740005  3