In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import joblib

data_path = r"C:\LEARNING\Growth Link\tested.csv"  
model_path = r"C:\LEARNING\Growth Link\model.pkl"  

# Load the dataset
print(f"Loading dataset from: {data_path}")
df = pd.read_csv(data_path)

# Handle missing values safely
df = df.copy()  # Avoid chained assignment issues
df.loc[:, 'Age'] = df['Age'].fillna(df['Age'].median())  # Fill missing Age with median
df.loc[:, 'Embarked'] = df['Embarked'].fillna(df['Embarked'].mode()[0])  # Fill missing Embarked with mode

# Drop unnecessary column
if 'Cabin' in df.columns:
    df.drop('Cabin', axis=1, inplace=True)  

# Encode categorical variables
df.loc[:, 'Sex'] = df['Sex'].map({'male': 0, 'female': 1})  # Convert Sex to binary
df = pd.get_dummies(df, columns=['Embarked', 'Pclass'], drop_first=True)  # One-hot encode categorical features

# Normalize numerical features
scaler = StandardScaler()
df[['Age', 'Fare']] = scaler.fit_transform(df[['Age', 'Fare']])

# Drop irrelevant columns
df.drop(columns=['PassengerId', 'Name', 'Ticket'], inplace=True, errors='ignore')

# Split the data
X = df.drop(columns=['Survived'], errors='ignore')  # Features
y = df['Survived']  # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Save the model
joblib.dump(model, model_path)
print(f"Model saved to '{model_path}'")


Loading dataset from: C:\LEARNING\Growth Link\tested.csv
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
 [[53  0]
 [ 0 31]]
Model saved to 'C:\LEARNING\Growth Link\model.pkl'
