<a href="https://colab.research.google.com/github/galenzo17/AI-personal-test/blob/main/Medical.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Complete Python Script for Drug Classification and Counting in a Hospital Cabinet

# Step 1: Install Necessary Libraries
import sys
import subprocess

def install_package(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# List of required packages
required_packages = ["pandas", "numpy", "scikit-learn", "joblib"]

# Install missing packages
for package in required_packages:
    try:
        __import__(package)
    except ImportError:
        install_package(package)

# Step 2: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import classification_report, mean_absolute_error
import joblib

# Step 3: Load the Dataset
# Ensure 'medical_data.csv' is in the same directory as this script
try:
    data = pd.read_csv('medical_data.csv')
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: 'medical_data.csv' not found in the current directory.")
    sys.exit(1)

# Step 4: Explore the Dataset
print("\nFirst 5 rows of the dataset:")
print(data.head())

print("\nDataset Information:")
print(data.info())

# Step 5: Data Preprocessing
# Handle missing values by dropping rows with any missing values
data_clean = data.dropna()
print(f"\nDataset shape after dropping missing values: {data_clean.shape}")

# Ensure required columns exist
required_columns = ['drug', 'quantity']
for column in required_columns:
    if column not in data_clean.columns:
        print(f"Error: Required column '{column}' not found in the dataset.")
        sys.exit(1)

# Separate features and target for classification
X_classification = data_clean.drop(['drug', 'quantity'], axis=1)
y_classification = data_clean['drug']

# Encode target labels for classification
label_encoder = LabelEncoder()
y_classification_encoded = label_encoder.fit_transform(y_classification)

# Separate features and target for regression (counting)
X_regression = data_clean.drop(['drug', 'quantity'], axis=1)
y_regression = data_clean['quantity']

# Handle categorical features by encoding them
X_classification_encoded = pd.get_dummies(X_classification, drop_first=True)
X_regression_encoded = pd.get_dummies(X_regression, drop_first=True)

# Step 6: Split the Data into Training and Testing Sets

# For Classification
X_train_clas, X_test_clas, y_train_clas, y_test_clas = train_test_split(
    X_classification_encoded, y_classification_encoded, test_size=0.2, random_state=42
)

# For Regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_regression_encoded, y_regression, test_size=0.2, random_state=42
)

# Step 7: Feature Scaling
scaler_clas = StandardScaler()
X_train_clas_scaled = scaler_clas.fit_transform(X_train_clas)
X_test_clas_scaled = scaler_clas.transform(X_test_clas)

scaler_reg = StandardScaler()
X_train_reg_scaled = scaler_reg.fit_transform(X_train_reg)
X_test_reg_scaled = scaler_reg.transform(X_test_reg)

# Step 8: Train the Classification Model
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train_clas_scaled, y_train_clas)
print("\nClassification model trained successfully.")

# Step 9: Evaluate the Classification Model
y_pred_clas = classifier.predict(X_test_clas_scaled)
print("\nClassification Report:")
print(classification_report(y_test_clas, y_pred_clas, target_names=label_encoder.classes_))

# Step 10: Train the Regression Model
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train_reg_scaled, y_train_reg)
print("Regression model trained successfully.")

# Step 11: Evaluate the Regression Model
y_pred_reg = regressor.predict(X_test_reg_scaled)
mae = mean_absolute_error(y_test_reg, y_pred_reg)
print(f"\nMean Absolute Error for Quantity Prediction: {mae:.2f}")

# Step 12: Save the Trained Models
joblib.dump(classifier, 'drug_classification_model.pkl')
joblib.dump(regressor, 'drug_quantity_regressor.pkl')
joblib.dump(scaler_clas, 'scaler_classification.pkl')
joblib.dump(scaler_reg, 'scaler_regression.pkl')
print("\nModels and scalers saved successfully.")

# Optional: Function to Load and Use the Models
def predict_new_data(new_data):
    """
    Predicts the drug classification and quantity for new data.

    Parameters:
    new_data (pd.DataFrame): New data with the same features as the training data.

    Returns:
    tuple: (predicted_drug, predicted_quantity)
    """
    # Load models and scalers
    clf = joblib.load('drug_classification_model.pkl')
    reg = joblib.load('drug_quantity_regressor.pkl')
    scaler_c = joblib.load('scaler_classification.pkl')
    scaler_r = joblib.load('scaler_regression.pkl')
    le = label_encoder  # Assuming label_encoder is available

    # Encode categorical features
    new_data_encoded = pd.get_dummies(new_data, drop_first=True)

    # Align the new data with training data columns
    new_data_encoded = new_data_encoded.reindex(columns=X_classification_encoded.columns, fill_value=0)

    # Scale features
    new_data_scaled_clas = scaler_c.transform(new_data_encoded)
    new_data_scaled_reg = scaler_r.transform(new_data_encoded)

    # Predict
    pred_clas = clf.predict(new_data_scaled_clas)
    pred_reg = reg.predict(new_data_scaled_reg)

    # Decode classification labels
    pred_drug = le.inverse_transform(pred_clas)

    return pred_drug, pred_reg

# Example Usage of predict_new_data (Uncomment and modify as needed)
# new_sample = pd.DataFrame({
#     'feature1': [value1],
#     'feature2': [value2],
#     # Add all necessary features
# })
# predicted_drug, predicted_quantity = predict_new_data(new_sample)
# print(f"Predicted Drug: {predicted_drug[0]}, Predicted Quantity: {predicted_quantity[0]:.2f}")