In [3]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from patsy import dmatrices
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load the affairs dataset from Statsmodels
affairs_data = sm.datasets.fair.load_pandas().data

# Add a binary variable 'affair' to indicate if a woman had at least one affair
affairs_data['affair'] = (affairs_data['affairs'] > 0).astype(int)

# Select the features and target variable
y, X = dmatrices('affair ~ rate_marriage + age + yrs_married + children + religious + educ + occupation + occupation_husb',
                 affairs_data, return_type='dataframe')

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a logistic regression model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train.values.ravel())

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))


Accuracy: 0.716640502354788
              precision    recall  f1-score   support

         0.0       0.74      0.89      0.81       849
         1.0       0.62      0.38      0.47       425

    accuracy                           0.72      1274
   macro avg       0.68      0.63      0.64      1274
weighted avg       0.70      0.72      0.69      1274



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [4]:
import pickle

# Save the trained model to a file
filename = 'affairs_model.pkl'
pickle.dump(model, open(filename, 'wb'))


In [6]:
import pickle
from fastapi import FastAPI
from pydantic import BaseModel

# Define the input data schema
class InputData(BaseModel):
    rate_marriage: float
    age: float
    yrs_married: float
    children: float
    religious: float
    educ: float
    occupation: float
    occupation_husb: float

# Load the trained model
model = pickle.load(open('affairs_model.pkl', 'rb'))

# Create the FastAPI application
app = FastAPI()

# Define the prediction endpoint
@app.post("/predict")
def predict_affair(data: InputData):
    # Prepare input data
    input_data = [[
        data.rate_marriage,
        data.age,
        data.yrs_married,
        data.children,
        data.religious,
        data.educ,
        data.occupation,
        data.occupation_husb
    ]]

    # Make prediction
    prediction = model.predict(input_data)

    # Format the response
    response = {"prediction": int(prediction[0])}

    return response
