In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

# Load Titanic dataset from CSV file
data = pd.read_csv('../data/train.csv')

# Preprocessing: drop unnecessary columns and fill missing values
data = data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
data['Age'] = data['Age'].fillna(data['Age'].median())
data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])

# Split data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(
    data.drop('Survived', axis=1), data['Survived'], test_size=0.2, random_state=42)

# Define preprocessing pipeline
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, ['Age', 'Fare']),
        ('cat', categorical_transformer, ['Sex', 'Embarked'])])

# Define Scikit-learn model
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('classifier', RandomForestClassifier())])

# Train Scikit-learn model on training data
model.fit(train_data, train_labels)

# Save Scikit-learn model to disk
joblib.dump(model, '../models/model.joblib')
joblib.dump(preprocessor, '../models/preprocessor.joblib')

# Evaluate Scikit-learn model on testing data
predictions = model.predict(test_data)
accuracy = accuracy_score(test_labels, predictions)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7430167597765364


In [16]:




import joblib
import numpy as np
from flask import Flask, request, jsonify

# Load the trained model
model = joblib.load('../models/model.joblib')

# Define Flask app
app = Flask(__name__)

# Define endpoint for predicting survival
@app.route('/predict', methods=['POST'])
def predict():
    # Get input data from request
    input_data = request.get_json()

    # Convert input data to a pandas DataFrame
    input_df = pd.DataFrame(input_data, index=[0])

    # Make predictions using the loaded model
    predictions = model.predict(input_df)

    # Return predictions as JSON
    return jsonify(predictions.tolist())

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=8080)


array([0])