In [None]:
import pandas as pd  # Import pandas for data manipulation
import numpy as np  # Import numpy for numerical computations
import matplotlib.pyplot as plt  # Import matplotlib for data visualization
from sklearn.ensemble import RandomForestClassifier  # Import RandomForestClassifier from scikit-learn
from sklearn.metrics import accuracy_score  # Import accuracy_score metric from scikit-learn
from sklearn.model_selection import train_test_split  # Import train_test_split for data splitting
from sklearn.impute import SimpleImputer  # Import SimpleImputer for handling missing values
import joblib  # Import joblib for model persistence
import logging  # Import logging for logging events
import time  # Import time for time-related operations

titanic_data = pd.read_csv('data/titanic.csv')  # Read Titanic dataset into a pandas DataFrame
print(titanic_data.head())  # Display the first few rows of the dataset

features = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Sex', 'Embarked']  # Define features for modeling
X = titanic_data[features]  # Assign features to X
y = titanic_data['Survived']  # Assign target variable to y

X = pd.get_dummies(X, columns=['Sex', 'Embarked'], drop_first=True)  # Perform one-hot encoding for categorical variables

imputer = SimpleImputer(strategy='median')  # Initialize imputer for handling missing values using median strategy
numerical_cols = ['Age', 'Fare']  # Specify numerical columns for imputation
X[numerical_cols] = imputer.fit_transform(X[numerical_cols])  # Impute missing values in specified numerical columns

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)  # Split data into train and validation sets

model = RandomForestClassifier()  # Initialize RandomForestClassifier model
model.fit(X_train, y_train)  # Train the model on the training data

joblib.dump(model, 'model/model.joblib')  # Save the trained model to a file

logging.basicConfig(filename='real_time_model.log', level=logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s')  # Configure logging settings

predictions_list = []  # Initialize a list to store model accuracy predictions

while True:  # Create an infinite loop for real-time updates
    model.fit(X_train, y_train)  # Retrain the model on new data

    y_pred_val = model.predict(X_val)  # Predict on the validation set
    accuracy_val = accuracy_score(y_val, y_pred_val)  # Calculate accuracy on validation set
    predictions_list.append(accuracy_val)  # Store the accuracy value in the predictions list

    logging.info(f"Real-Time Model Accuracy: {accuracy_val:.2f}")  # Log the real-time model accuracy

    plt.figure(figsize=(10, 5))  # Create a plot for real-time predictions visualization
    plt.plot(predictions_list, marker='o', linestyle='-', color='b')  # Plot accuracy predictions
    plt.title('Real-Time Model Predictions')  # Set title for the plot
    plt.xlabel('Updates')  # Set label for x-axis
    plt.ylabel('Accuracy')  # Set label for y-axis
    plt.grid(True)  # Enable grid in the plot
    plt.show()  # Display the plot

    time.sleep(60)  # Pause for 60 seconds before the next update






from flask import Flask, request, jsonify  # Import necessary Flask modules

import joblib  # Import the joblib module for model loading

app = Flask(__name__)  # Initialize a Flask web application

model = joblib.load('model/model.joblib')  # Load the trained machine learning model

@app.route('/predict', methods=['GET', 'POST'])  # Define an endpoint '/predict' that accepts GET and POST requests
def predict():
    if request.method == 'POST':  # Check if the request method is POST
        data = request.get_json()  # Get JSON data from the POST request
        features = data['features']  # Extract features from the received JSON data
        prediction = model.predict([features])[0]  # Use the model to predict based on the provided features
        return jsonify({'prediction': int(prediction)})  # Return the prediction as JSON
    else:
        return "This endpoint accepts POST requests only."  # Return a message for GET requests

if __name__ == '__main__':  # Start the Flask application if this script is executed directly
    app.run(debug=True)  # Run the Flask application in debug mode