In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression

In [2]:
df = pd.read_csv('new_train2.csv')
df['label'] = df['y'].apply(lambda x: 1 if x == 'yes' else 0)
del df['y']

X = df.drop('label', axis=1)
y = df['label']

In [3]:
categorical_features = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'poutcome']
numerical_features = ['age', 'duration', 'campaign']

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(sparse=False), categorical_features)
    ])

log_reg_pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                                   ('classifier', LogisticRegression(solver='liblinear', max_iter=1000))])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

log_reg_pipeline.fit(X_train, y_train)



In [4]:
# Manually input feature values for prediction
input_data = {
    'age': [49],
    'job': ['blue-collar'],
    'marital': ['married'],
    'education': ['basic.9y'],
    'default': ['unknown'],
    'housing': ['no'],
    'loan': ['no'],
    'contact': ['cellular'],
    'month': ['nov'],
    'day_of_week': ['wed'],
    'duration': [227],
    'campaign': [4],
    'poutcome': ['nonexistent']
}

# Convert dictionary to DataFrame
input_df = pd.DataFrame.from_dict(input_data)

# Make Prediction
prediction = log_reg_pipeline.predict(input_df)
probability = log_reg_pipeline.predict_proba(input_df)[:, 1]

# Output prediction and probability
predicted_label = 'Subscribe' if prediction[0] == 1 else 'Not Subscribe'
print(f"Predicted Label: {predicted_label}, Probability: {probability[0]:.4f}")

Predicted Label: Not Subscribe, Probability: 0.0155
