In [8]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load the Titanic dataset
df = sns.load_dataset('titanic')

# Prepare the features and target variable
X = df[['pclass', 'age', 'fare', 'sex', 'parch', 'sibsp']]
y = df['survived']

# One-hot encoding for 'sex' and handling missing values
X = pd.get_dummies(X, columns=['sex'], drop_first=True)  # Avoid dummy variable trap
#X['age'].fillna(X['age'].mean(), inplace=True) 
X.fillna({'age': X['age'].mean()}, inplace=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Initialize models and parameter grids
models = {
    'Logistic Regression': (LogisticRegression(max_iter=1000), {'C': [0.01, 0.1, 1, 10, 100],'n_jobs': [-1]}),
    'KNeighbors Classifier': (KNeighborsClassifier(), {'n_neighbors': np.arange(1,40,2), 'weights': ['uniform', 'distance']}),
    'RandomForest Classifier': (RandomForestClassifier(), {'n_estimators': [10,20,50, 100, 200], 'max_depth': [None, 10, 20]}),
    'SVC': (SVC(), {'C': [0.01, 0.1, 1, 10], 'kernel': ['linear', 'rbf']}),
    'Decision Tree': (DecisionTreeClassifier(), {'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]})
}

# Store results
models_score = []

# Train each model with grid search and evaluate its accuracy
for name, (model, param_grid) in models.items():
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)
    
    # Get the best model
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    models_score.append((name, accuracy, grid_search.best_params_))  # Append as a tuple

# Sort models by accuracy
sorted_models = sorted(models_score, key=lambda x: x[1], reverse=True)

# Print sorted accuracy scores and best parameters
for model in sorted_models:
    print(f"Accuracy score for {model[0]}: {model[1]:.2f}, Best Parameters: {model[2]}")


Accuracy score for Decision Tree: 0.83, Best Parameters: {'max_depth': 10, 'min_samples_split': 10}
Accuracy score for Logistic Regression: 0.81, Best Parameters: {'C': 1, 'n_jobs': -1}
Accuracy score for RandomForest Classifier: 0.81, Best Parameters: {'max_depth': 10, 'n_estimators': 50}
Accuracy score for SVC: 0.78, Best Parameters: {'C': 1, 'kernel': 'linear'}
Accuracy score for KNeighbors Classifier: 0.73, Best Parameters: {'n_neighbors': np.int64(25), 'weights': 'distance'}


In [5]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score
import numpy as np

# Load the Titanic dataset
df = sns.load_dataset('titanic')

# Prepare the features and target variable
X = df[['pclass', 'age', 'fare', 'sex', 'parch', 'sibsp']]
y = df['survived']

# One-hot encoding for 'sex' and handling missing values
X = pd.get_dummies(X, columns=['sex'], drop_first=True)  # Avoid dummy variable trap
#X['age'].fillna(X['age'].mean(), inplace=True) 
X.fillna({'age': X['age'].mean()}, inplace=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Initialize models and parameter grids
models = {
    'Logistic Regression': (LogisticRegression(max_iter=1000), {'C': [0.01, 0.1, 1, 10, 100]}),
    'KNeighbors Classifier': (KNeighborsClassifier(), {'n_neighbors': np.arange(1,40,2), 'weights': ['uniform', 'distance']}),
    'RandomForest Classifier': (RandomForestClassifier(), {'n_estimators': [10,20,50, 100, 200], 'max_depth': [None, 10, 20]}),
    'SVC': (SVC(), {'C': [0.01, 0.1, 1, 10], 'kernel': ['linear', 'rbf']}),
    'Decision Tree': (DecisionTreeClassifier(), {'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]})
}

# Store results
models_score = []

# Train each model with grid search and evaluate its accuracy
for name, (model, param_grid) in models.items():
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')
    grid_search.fit(X_train, y_train)
    
    # Get the best model
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    precision = precision_score(y_test, y_pred)
    
    models_score.append((name, precision, grid_search.best_params_))  # Append as a tuple

# Sort models by accuracy
sorted_models = sorted(models_score, key=lambda x: x[1], reverse=True)

# Print sorted accuracy scores and best parameters
for model in sorted_models:
    print(f"precision score for {model[0]}: {model[1]:.2f}, Best Parameters: {model[2]}")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


precision score for Logistic Regression: 0.91, Best Parameters: {'C': 0.01}
precision score for SVC: 0.89, Best Parameters: {'C': 0.01, 'kernel': 'linear'}
precision score for RandomForest Classifier: 0.84, Best Parameters: {'max_depth': 10, 'n_estimators': 50}
precision score for Decision Tree: 0.82, Best Parameters: {'max_depth': 10, 'min_samples_split': 10}
precision score for KNeighbors Classifier: 0.74, Best Parameters: {'n_neighbors': np.int64(39), 'weights': 'distance'}


In [6]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
import numpy as np

# Load the Titanic dataset
df = sns.load_dataset('titanic')

# Prepare the features and target variable
X = df[['pclass', 'age', 'fare', 'sex', 'parch', 'sibsp']]
y = df['survived']

# One-hot encoding for 'sex' and handling missing values
X = pd.get_dummies(X, columns=['sex'], drop_first=True)  # Avoid dummy variable trap
#X['age'].fillna(X['age'].mean(), inplace=True) 
X.fillna({'age': X['age'].mean()}, inplace=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# Initialize models and parameter grids
models = {
    'Logistic Regression': (LogisticRegression(max_iter=1000), {'C': [0.01, 0.1, 1, 10, 100]}),
    'KNeighbors Classifier': (KNeighborsClassifier(), {'n_neighbors': np.arange(1,40,2), 'weights': ['uniform', 'distance']}),
    'RandomForest Classifier': (RandomForestClassifier(), {'n_estimators': [10,20,50, 100, 200], 'max_depth': [None, 10, 20]}),
    'SVC': (SVC(), {'C': [0.01, 0.1, 1, 10], 'kernel': ['linear', 'rbf']}),
    'Decision Tree': (DecisionTreeClassifier(), {'max_depth': [None, 10, 20], 'min_samples_split': [2, 5, 10]})
}

# Store results
models_score = []

# Train each model with grid search and evaluate its accuracy
for name, (model, param_grid) in models.items():
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='f1')
    grid_search.fit(X_train, y_train)
    
    # Get the best model
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    
    models_score.append((name, f1, grid_search.best_params_))  # Append as a tuple

# Sort models by accuracy
sorted_models = sorted(models_score, key=lambda x: x[1], reverse=True)

# Print sorted accuracy scores and best parameters
for model in sorted_models:
    print(f"f1 score for {model[0]}: {model[1]:.2f}, Best Parameters: {model[2]}")


f1 score for Logistic Regression: 0.76, Best Parameters: {'C': 1}
f1 score for RandomForest Classifier: 0.75, Best Parameters: {'max_depth': 10, 'n_estimators': 50}
f1 score for SVC: 0.73, Best Parameters: {'C': 0.1, 'kernel': 'linear'}
f1 score for Decision Tree: 0.72, Best Parameters: {'max_depth': 20, 'min_samples_split': 5}
f1 score for KNeighbors Classifier: 0.56, Best Parameters: {'n_neighbors': np.int64(3), 'weights': 'distance'}
