In [2]:
import sys
sys.path.insert(0, '/gpfs/commons/home/jameslee/miniconda3/envs/scanpy/lib/python3.10/site-packages')

# Import numpy first
import numpy as np

# Import other packages
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc, confusion_matrix
import seaborn as sns

# Import matplotlib with a workaround
import matplotlib
matplotlib.use('Agg')  # Use non-interactive backend
import matplotlib.pyplot as plt

# Simulated feature matrix (replace with your data)
data = pd.DataFrame({
    'sample_id': [f'S{i}' for i in range(1, 101)],
    'pct_TRMstem_CD8': np.random.uniform(0, 0.3, 100),
    'pct_Exhausted_CD8': np.random.uniform(0, 0.2, 100),
    'TIGIT_PVR_score': np.random.uniform(0, 1, 100),
    'PDCD1_expression': np.random.uniform(0, 2, 100),
    'immune_label': np.random.choice([0, 1], 100)
})

# Features and labels
X = data.drop(columns=['sample_id', 'immune_label'])
y = data['immune_label']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train Random Forest model
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# 1. Feature Importance Plot
importances = clf.feature_importances_
feature_names = X.columns
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
importance_df = importance_df.sort_values(by='Importance', ascending=False)

plt.figure(figsize=(10, 6))
plt.barh(importance_df['Feature'], importance_df['Importance'], color='skyblue')
plt.xlabel('Importance')
plt.title('Feature Importance in Classifying Immune Status')
plt.gca().invert_yaxis()
plt.savefig('feature_importance.png')
plt.close()

# 2. ROC Curve
y_pred_prob = clf.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Immune Status Prediction')
plt.legend(loc='lower right')
plt.savefig('roc_curve.png')
plt.close()

# 3. Confusion Matrix
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix for Immune Status Prediction')
plt.savefig('confusion_matrix.png')
plt.close()

ImportError: Matplotlib requires numpy>=1.23; you have 1.22.3