In [1]:
# Import the required libraries and dependencies
import pandas as pd
from pathlib import Path
from sklearn.cluster import KMeans, AgglomerativeClustering, Birch
from sklearn.preprocessing import StandardScaler
import hvplot.pandas

In [2]:
# Read the CSV file into a Pandas DataFrame
# Set the index using the Ticker column
rate_df = pd.read_csv(
    Path("../Resources/global_carry_trades.csv"))

# Review the DataFrame
rate_df.head()

Unnamed: 0,interest_differential,next_month_currency_return,IMF Country Code
0,0.001414,-0.061174,GBR
1,-0.00057,-0.05812,BEL
2,0.001478,-0.056031,DNK
3,0.000655,-0.056991,FRA
4,-0.002928,-0.067056,DEU


## Prepare the Data 

In [3]:
# Use the StandardScaler module and fit_transform function to 
# scale all columns with numerical values
rate_scaled = StandardScaler().fit_transform(rate_df[["interest_differential", "next_month_currency_return"]])

# Diplay the first three rows of the scaled data
rate_scaled[0:3]

array([[-0.24270991, -1.93608838],
       [-0.8539933 , -1.84109498],
       [-0.22308154, -1.77613322]])

In [4]:
# Create a DataFrame called with the scaled data
# The column names should match those referenced in the StandardScaler step
rate_scaled_df = pd.DataFrame(rate_scaled, columns=["interest_differential", "next_month_currency_return"])
rate_scaled_df.head()

Unnamed: 0,interest_differential,next_month_currency_return
0,-0.24271,-1.936088
1,-0.853993,-1.841095
2,-0.223082,-1.776133
3,-0.476617,-1.805994
4,-1.580459,-2.119073


In [5]:
# Encode (convert to dummy variables) the "IMF Country Code" column
rate_dummies = pd.get_dummies(rate_df["IMF Country Code"])

# Review the DataFrame
rate_dummies.head()

Unnamed: 0,AUS,BEL,CAN,CHE,DEU,DNK,FRA,GBR,ITA,JPN,NLD,NOR,NZL,SGP,SWE
0,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False
1,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
4,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False


In [6]:
# Concatenate the scaled data DataFrame with the "IMF Country Code" encoded dummies 
rate_scaled_df = pd.concat([rate_scaled_df, rate_dummies], axis=1)

# Display the combined DataFrame.
rate_scaled_df.head()

Unnamed: 0,interest_differential,next_month_currency_return,AUS,BEL,CAN,CHE,DEU,DNK,FRA,GBR,ITA,JPN,NLD,NOR,NZL,SGP,SWE
0,-0.24271,-1.936088,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False
1,-0.853993,-1.841095,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False
2,-0.223082,-1.776133,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False
3,-0.476617,-1.805994,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
4,-1.580459,-2.119073,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False


## Fit and Predict with KMeans


In [7]:
# Initialize the K-Means model with n_clusters=3
model = KMeans(n_clusters=3)

# Fit the model for the rate_df_scaled DataFrame
model.fit(rate_scaled_df)

# Save the predicted model clusters to a new DataFrame.
rate_clusters = model.predict(rate_scaled_df)

# View the country clusters
print(rate_clusters)

  super()._check_params_vs_input(X, default_n_init=10)


[0 2 0 0 2 0 2 0 0 2 1 2 0 1 2 1 2 1 2 2 1 2 1 1 2 1 2 1 1 2 1 2 1 1 2 1 2
 1 1 2 1 2 1 0 2 2 2 0 2 2 0 2 1 1 2 1 2 1 1 2 2 2 2 2 2 2 2 0 2 2 1 2 1 1
 2 0 2 0 0 2 0 2 0 0 2 1 2 0 0 2 2 2 2 2 2 2 2 0 2 2 1 2 1 1 2 0 2 2 2 2 0
 2 0 2 2 2 2 1 0 2 1 2 2 2 2 1 2 1 1 2 1 2 0 1 2 1 1 1 1 2 1 2 1 1 2 1 2 1
 1 2 1 2 2 2 2 1 2 1 1 2 1 2 1 1 2 0 2 2 2 2 0 2 0 2 2 2 2 0 0 2 0 2 2 2 2
 0 2 0 0 2 1 2 1 0 2 1 1 1 2 1 1 1 1 1 2 2 2 0 1 2 0 2 2 2 2 0 2 2 2 2 1 2
 1 1 2 1 2 2 2 2 1 2 1 1 2 1 2 0 1 2 0 2 2 2 2 0 2 0 0 2 0 2 0 0 2 0 2 2 2
 2 1 2 2 1 2 1 2 1 0 2 1 1 1 1 2 1 2 1 1 2 1 2 1 1 2 0 2 0 2 2 0 2 0 0 2 1
 2 1 1 2 1 1 1 1 2 1 2 1 1 2 1 2 1 1 2 0 1 1 1 2 1 2 1 1 2 1 2 1 1 2 1 1 1
 1 2 1 1 1 1 2 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 1 2 1 1 1 2 1 2 1 1 2 0 2
 0 1 2 1 1 1 2 1 2 1 1 2 1 2 0 0 2 0 1 1 2 1 2 1 1 2 1 2 1 0 2 1 1 1 2 1 2
 1 1 1 1 2 1 1 2 1 2 2 2 0 2 1 1 2 1 2 1 1 1 1 1 1 2 1 2 1 1 2 1 2 1 1 2 1
 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 2 2 2 2 2 2 2 2 1 1 1 2 1 1 1 2 2 2 1 1 2 1
 1 1 1 2 1 1 1 1 1 1 1 1 

In [8]:
# Create a copy of the concatenated DataFrame
rate_scaled_predictions = rate_scaled_df.copy()

# Create a new column in the copy of the concatenated DataFrame with the predicted clusters
rate_scaled_predictions["rate_cluster"] = rate_clusters

# Review the DataFrame
rate_scaled_predictions.head()

Unnamed: 0,interest_differential,next_month_currency_return,AUS,BEL,CAN,CHE,DEU,DNK,FRA,GBR,ITA,JPN,NLD,NOR,NZL,SGP,SWE,rate_cluster
0,-0.24271,-1.936088,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,0
1,-0.853993,-1.841095,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,2
2,-0.223082,-1.776133,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,0
3,-0.476617,-1.805994,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,0
4,-1.580459,-2.119073,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,2


## Plot and Analyze the Results

In [9]:
# Group the saved DataFrame by cluster using `groupby` to calculate average currency returns
rate_scaled_predictions.groupby('rate_cluster').mean()


Unnamed: 0_level_0,interest_differential,next_month_currency_return,AUS,BEL,CAN,CHE,DEU,DNK,FRA,GBR,ITA,JPN,NLD,NOR,NZL,SGP,SWE
rate_cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,0.626957,-1.134452,0.109524,0.0,0.033333,0.066667,0.061905,0.090476,0.07619,0.114286,0.109524,0.004762,0.057143,0.109524,0.07619,0.0,0.090476
1,0.404522,0.62866,0.089249,0.01217,0.109533,0.042596,0.05071,0.077079,0.073022,0.085193,0.085193,0.034483,0.052738,0.089249,0.107505,0.002028,0.089249
2,-1.118549,-0.242211,0.006757,0.057432,0.030405,0.118243,0.108108,0.043919,0.060811,0.013514,0.016892,0.172297,0.108108,0.010135,0.0,0.22973,0.023649


In [10]:
rate_scaled_predictions.hvplot

<hvplot.plotting.core.hvPlotTabular at 0x23cbc4b1b40>

* Based on this plot, which cluster of country appears to provide both the highest interest spread and currency return?

## Bonus

In [11]:
# Initialize a Birch model with n_clusters=5


# Fit the model for the df_bitcoin_scaled DataFrame


# Predict the model segments (clusters)


# View the stock segments


# Create a copy of the concatenated DataFrame


# Create a new column in the copy of the concatenated DataFrame with the predicted clusters


# Review the DataFrame
