In [30]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, roc_auc_score

# Load dataset
data = pd.read_csv('Dataset/loan_data.csv')

# Define feature columns and target
features = data.drop(columns=['notfullypaid'])
target = data['notfullypaid']

# Preprocess data
# Encode categorical variables and standardize numerical features
categorical_features = features.select_dtypes(include=['object']).columns
numerical_features = features.select_dtypes(include=['int64', 'float64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and model training
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', DecisionTreeClassifier())
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, pipeline.predict_proba(X_test)[:, 1])

print(f'Accuracy: {accuracy}')
print(f'ROC-AUC: {roc_auc}')


Accuracy: 0.7353862212943633
ROC-AUC: 0.525018571094219


In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_score, recall_score, f1_score

# Load dataset
data = pd.read_csv('hospital_readmission_data.csv')

# Handle missing values
data.fillna(data.mode().iloc[0], inplace=True)

# Define feature columns and target
features = data.drop(columns=['readmission'])
target = data['readmission']

# Preprocess data
# Encode categorical variables and standardize numerical features
categorical_features = features.select_dtypes(include=['object']).columns
numerical_features = features.select_dtypes(include=['int64', 'float64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and model training
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')


In [31]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.datasets import load_digits

# Load dataset
digits = load_digits()
data = digits.data
target = digits.target

# Normalize pixel values
scaler = StandardScaler()
data = scaler.fit_transform(data)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42)

# Train the model
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

# Predict and evaluate
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')


Accuracy: 0.8583333333333333
Confusion Matrix:
[[29  0  0  0  2  1  0  1  0  0]
 [ 0 22  1  0  1  0  1  1  1  1]
 [ 0  1 25  2  0  1  0  1  1  2]
 [ 0  0  0 30  0  0  2  0  1  1]
 [ 0  0  0  0 42  2  0  2  0  0]
 [ 0  0  0  1  1 44  1  0  0  0]
 [ 0  0  0  0  2  0 32  0  1  0]
 [ 0  0  0  2  1  0  0 31  0  0]
 [ 0  2  0  1  0  1  0  1 22  3]
 [ 0  0  0  3  2  0  0  2  1 32]]


In [42]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix

# Load dataset
data = pd.read_csv('Dataset/loan_approval_dataset.csv')

# Handle missing values
data.fillna(data.mode().iloc[0], inplace=True)

# Strip leading and trailing spaces from column names
data.columns = data.columns.str.strip()

# Define feature columns and target
features = data.drop(columns=['loan_status'])
target = data['loan_status']

# Preprocess data
# Encode categorical variables and standardize numerical features
categorical_features = features.select_dtypes(include=['object']).columns
numerical_features = features.select_dtypes(include=['int64', 'float64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(), categorical_features)
    ])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and model training
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression())
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')


Accuracy: 0.9074941451990632
Confusion Matrix:
[[500  36]
 [ 43 275]]


In [40]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, roc_auc_score

# Load dataset
data = pd.read_csv('Dataset/WineQT.csv')

# Handle missing values
# data.fillna(method='ffill', inplace=True)

# Define feature columns and target
features = data.drop(columns=['quality'])
target = data['quality'].apply(lambda x: 1 if x >= 7 else 0)  # Convert to binary classification (good/bad)

# Preprocess data
# Standardize numerical features
numerical_features = features.select_dtypes(include=['int64', 'float64']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features)
    ])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Create a pipeline with preprocessing and model training
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', DecisionTreeClassifier())
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, pipeline.predict_proba(X_test)[:, 1])

print(f'Accuracy: {accuracy}')
print(f'ROC-AUC: {roc_auc}')


Accuracy: 0.8733624454148472
ROC-AUC: 0.7895344705046198
