In [None]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

# ==========================
# 1️⃣ Load Dataset
# ==========================
df = pd.read_csv("FINAL_USO_cleaned.csv")

# ==========================
# 2️⃣ Prepare Features & Target
# ==========================
X = df.drop(columns=['Date', 'EU_Trend'])
y = df['EU_Trend']

# ==========================
# 3️⃣ Split Data
# ==========================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ==========================
# 4️⃣ Train Logistic Regression
# ==========================
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)

# ==========================
# 5️⃣ Train Random Forest
# ==========================
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

# ==========================
# 6️⃣ Evaluate Models
# ==========================
lr_acc = accuracy_score(y_test, lr_pred)
rf_acc = accuracy_score(y_test, rf_pred)

print("Logistic Regression Accuracy:", round(lr_acc, 3))
print("Random Forest Accuracy:", round(rf_acc, 3))

# ==========================
# 7️⃣ Confusion Matrices
# ==========================
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
ConfusionMatrixDisplay(confusion_matrix(y_test, lr_pred), display_labels=[0, 1]).plot(ax=axes[0], colorbar=False)
axes[0].set_title("Logistic Regression")

ConfusionMatrixDisplay(confusion_matrix(y_test, rf_pred), display_labels=[0, 1]).plot(ax=axes[1], colorbar=False)
axes[1].set_title("Random Forest")

plt.tight_layout()
plt.show()

# ==========================
# 8️⃣ Feature Importance (Random Forest)
# ==========================
importances = rf.feature_importances_
feature_names = X.columns

# Plot top 10 important features
feat_imp = pd.Series(importances, index=feature_names).sort_values(ascending=False)[:10]
plt.figure(figsize=(8,5))
feat_imp.plot(kind='barh')
plt.title("Top 10 Important Features (Random Forest)")
plt.gca().invert_yaxis()
plt.show()