In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import eli5
from eli5.sklearn import PermutationImportance
import shap
import lime
import lime.lime_tabular
from yellowbrick.model_selection import FeatureImportances
from alibi.explainers import AnchorTabular
# import lucid -- Lucid might not be directly applicable here, but included for completeness

# Load the data
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# Data exploration (example)
print(train_data.head())
print(train_data.info())
print(train_data.describe())

# Assuming 'metastatic_diagnosis_period' is the target and 'patient_id' is an identifier
X_train = train_data.drop(['metastatic_diagnosis_period', 'patient_id'], axis=1)
y_train = train_data['metastatic_diagnosis_period']
X_test = test_data.drop(['patient_id'], axis=1)
test_ids = test_data['patient_id']

# Preprocess the data (e.g., scaling numerical features)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a predictive model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate the model
train_predictions = model.predict(X_train_scaled)
print(f"Train RMSE: {mean_squared_error(y_train, train_predictions, squared=False)}")

# Explain the model using ELI5
perm = PermutationImportance(model, random_state=42).fit(X_train_scaled, y_train)
eli5.show_weights(perm, feature_names=X_train.columns.tolist())

# Explain the model using SHAP
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X_train_scaled)
shap.summary_plot(shap_values, X_train, feature_names=X_train.columns)

# Explain the model using LIME
lime_explainer = lime.lime_tabular.LimeTabularExplainer(X_train_scaled, feature_names=X_train.columns, class_names=['metastatic_diagnosis_period'], mode='regression')
lime_exp = lime_explainer.explain_instance(X_train_scaled[0], model.predict)
lime_exp.show_in_notebook()

# Explain the model using Yellowbrick
viz = FeatureImportances(model, labels=X_train.columns)
viz.fit(X_train_scaled, y_train)
viz.show()

# Explain the model using Alibi
anchor_explainer = AnchorTabular(model.predict, feature_names=X_train.columns)
anchor_explainer.fit(X_train_scaled, disc_perc=(25, 50, 75))
anchor_exp = anchor_explainer.explain(X_train_scaled[0])
print(anchor_exp)

# Predict on the test data
test_predictions = model.predict(X_test_scaled)

# Save the results to a file
results = pd.DataFrame({'patient_id': test_ids, 'metastatic_diagnosis_period': test_predictions})
results.to_csv('predictions.csv', index=False)

print("Predictions saved to 'predictions.csv'")


ImportError: cannot import name 'if_delegate_has_method' from 'sklearn.utils.metaestimators' (C:\Users\avina\anaconda3\Lib\site-packages\sklearn\utils\metaestimators.py)

In [12]:
pip install scikit-learn==0.24.2

Collecting scikit-learn==0.24.2
  Using cached scikit-learn-0.24.2.tar.gz (7.5 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  Preparing metadata (pyproject.toml) did not run successfully.
  exit code: 1
  
  [62 lines of output]
  Partial import of sklearn during the build process.
  
    `numpy.distutils` is deprecated since NumPy 1.23.0, as a result
    of the deprecation of `distutils` itself. It will be removed for
    Python >= 3.12. For older Python versions it will remain present.
    It is recommended to use `setuptools < 60.0` for those Python versions.
    For more details, see:
      https://numpy.org/devdocs/reference/distutils_status_migration.html
  
  
  INFO: No module named 'numpy.distutils._msvccompiler' in numpy.distutils; trying from distutils
  Traceback (most recent call last):
    File "C:\Users\avina\anaconda3\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 353, in <module>
      main()
    File "C:\Users\avina\anaconda3\Lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 335, in main

In [None]:
pip install eli5==0.11
pip install shap
pip install lime
pip install yellowbrick
pip install alibi
pip install lucid