# Logistic Regression Analysis Using Pipline

## Import necessary libraries

In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
# Creating a sample dataset (you can replace this with your own dataset)
data = {
    'age': [25, 35, 45, 55, np.nan, 25, 35],
    'salary': [50000, 60000, 70000, 80000, 50000, 100000, 110000],
    'gender': ['male', 'female', 'female', 'male', 'female', 'male', 'female'],
    'purchased': [0, 1, 0, 1, 0, 1, 1]
}

# Make Data Frame
df = pd.DataFrame(data)
df.head(2)

Unnamed: 0,age,salary,gender,purchased
0,25.0,50000,male,0
1,35.0,60000,female,1


# Labelling

In [3]:
# Separating features and target variable
X = df.drop('purchased', axis=1)
y = df['purchased']

## Defining numerical and categorical columns

In [4]:
# Defining numerical and categorical columns
numeric_features = ['age', 'salary']
categorical_features = ['gender']

## Set the Pipline

In [5]:
# Pipeline for processing numerical data
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),  # Fill missing values with the mean
    ('scaler', StandardScaler())  # Scale the numerical data
])

In [6]:
# Pipeline for processing categorical data
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),  # Fill missing values with the most frequent value
    ('onehot', OneHotEncoder())  # Apply one-hot encoding to categorical variables
])

In [7]:
# Combining both numerical and categorical transformers
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [8]:
# Creating the pipeline: preprocessing and model training
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),  # Data preprocessing
    ('classifier', LogisticRegression())  # Logistic regression model
])

## Split Data as Train & Test

In [9]:
# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Train the Model

In [10]:
# Training the model
pipeline.fit(X_train, y_train)

## Prediction on Test Data

In [11]:
# Predicting on test data
y_pred = pipeline.predict(X_test)

## Model Accuracy

In [12]:
# Calculating the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy:.2f}")

Model accuracy: 0.67


# Get Prediction on New Data

In [13]:
# New data (the data you want to predict on)
new_data = pd.DataFrame({
    'age': [40],
    'salary': [65000],
    'gender': ['male']
})

In [14]:
# Making a prediction on the new data
new_prediction = pipeline.predict(new_data)
new_prediction_proba = pipeline.predict_proba(new_data)

# Printing the class probabilities
print(f"Predicted class for the new data: {new_prediction[0]}")
print(f"Probability of class 0: {new_prediction_proba[0][0]:.2f}")
print(f"Probability of class 1: {new_prediction_proba[0][1]:.2f}")

Predicted class for the new data: 0
Probability of class 0: 0.52
Probability of class 1: 0.48


# Save the Model

In [15]:
import joblib

# Modeli bir dosyaya kaydet
joblib.dump(pipeline, 'pipeline_model.pkl')

['pipeline_model.pkl']

# STREAMLIT

In [18]:
%%writefile Streamlit_app.py
import streamlit as st
import pandas as pd
import joblib

# Load the saved model
pipeline = joblib.load('pipeline_model.pkl')

# Streamlit title
st.title("Purchase Prediction Model")

# User input
age = st.number_input("Age", min_value=18, max_value=100, value=30)
salary = st.number_input("Salary", min_value=10000, max_value=200000, value=50000)
gender = st.selectbox("Gender", options=["male", "female"])

# Create a DataFrame from the user input
new_data = pd.DataFrame({
    'age': [age],
    'salary': [salary],
    'gender': [gender]
})

# Make a prediction
if st.button("Make Prediction"):
    prediction = pipeline.predict(new_data)
    prediction_proba = pipeline.predict_proba(new_data)

    # Display the results
    st.write(f"Predicted class: {prediction[0]}")
    st.write(f"Probability of class 0: {prediction_proba[0][0]:.2f}")
    st.write(f"Probability of class 1: {prediction_proba[0][1]:.2f}")

Overwriting Streamlit_app.py


In [None]:
!streamlit run Streamlit_app.py

In [None]:
# Done