In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

In [2]:
meat_production = pd.read_csv("Intermediate_data\Total_meat_production.csv", index_col=0)
portfolio = pd.read_csv("Portfolio_data\Market_portfolio_monthly.csv", index_col=0)

In [3]:
data = pd.DataFrame()
portfolio = portfolio[:-2]
data.index = portfolio.index

data['price_1m_pct_change'] = portfolio['Close'].pct_change(periods=1)
data['price_3m_pct_change'] = portfolio['Close'].pct_change(periods=3)
data['price_6m_pct_change'] = portfolio['Close'].pct_change(periods=6)

data['Return'] = portfolio['Close'].pct_change()
data['3m_volatility'] = data['Return'].rolling(window=3).std() * np.sqrt(12)
data['6m_volatility'] = data['Return'].rolling(window=6).std() * np.sqrt(12)
data = data.drop('Return', axis=1)

meat_production = meat_production.loc[portfolio.index]
data['production_1m_pct_change'] = meat_production['Production'].pct_change(periods=1)
data['production_3m_pct_change'] = meat_production['Production'].pct_change(periods=3)
data['production_6m_pct_change'] = meat_production['Production'].pct_change(periods=6)


In [4]:
portfolio['Shifted'] = portfolio['Close'].shift(-1)
data['Signal'] = (portfolio['Close'] < portfolio['Shifted']).astype(int)
portfolio = portfolio.drop('Shifted', axis=1)
display(data)

Unnamed: 0_level_0,price_1m_pct_change,price_3m_pct_change,price_6m_pct_change,3m_volatility,6m_volatility,production_1m_pct_change,production_3m_pct_change,production_6m_pct_change,Signal
Date-Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1999-11-01,,,,,,,,,0
1999-12-01,-0.080485,,,,,-0.000066,,,0
2000-01-01,-0.011524,,,,,-0.023020,,,0
2000-02-01,-0.018902,-0.108262,,0.131168,,0.000309,-0.022783,,0
2000-03-01,-0.060397,-0.088781,,0.091268,,0.073922,0.049524,,1
...,...,...,...,...,...,...,...,...,...
2023-08-01,-0.011187,0.012335,0.092106,0.103431,0.144723,0.117342,0.020290,0.110019,0
2023-09-01,-0.019928,0.006721,0.112235,0.109790,0.130563,-0.067772,-0.041293,-0.087105,0
2023-10-01,-0.054510,-0.083718,-0.006922,0.079361,0.144045,0.089275,0.134608,0.138340,0
2023-11-01,-0.005554,-0.078498,-0.067131,0.087168,0.103873,-0.033148,-0.018207,0.001713,1


In [5]:
data = data.dropna()

In [6]:
train_data = data[data.index < "2019-01-01"]
test_data = data[data.index >= "2019-01-01"]

X_train = train_data.iloc[:,0:-1]
y_train = train_data.iloc[:,-1:]

X_test = test_data.iloc[:,0:-1]
y_test = test_data.iloc[:,-1:]

model = XGBClassifier()
model.fit(X_train, y_train)

In [7]:
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 51.67%


In [8]:
cm = confusion_matrix(y_test, y_pred)

# Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, index=model.classes_, columns=model.classes_)

print("Confusion Matrix:")
print(cm_df)

Confusion Matrix:
   0   1
0  6  21
1  8  25


In [10]:
importance_scores = model.feature_importances_

feature_importance_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': importance_scores})
feature_importance_df = feature_importance_df.sort_values('Importance', ascending=False)

print("Feature Importance:")
print(feature_importance_df)

Feature Importance:
                    Feature  Importance
2       price_6m_pct_change    0.151703
4             6m_volatility    0.139391
5  production_1m_pct_change    0.133308
6  production_3m_pct_change    0.126201
3             3m_volatility    0.119783
7  production_6m_pct_change    0.114143
1       price_3m_pct_change    0.112803
0       price_1m_pct_change    0.102669
