In [19]:
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Load the data
df = pd.read_csv('metro1railCustomer.csv')

# Feature engineering
df['Distance'] = df.apply(lambda row: abs(ord(row['Destination'][0]) - ord('F')), axis=1)
df['NameLength'] = df['Name'].apply(len)

# Encode categorical features
df['Gender'] = df['Gender'].map({'Male': 0, 'Female': 1})
df['Complain'] = df['Complain'].map({'No': 0, 'Yes': 1})

# Split the data into training and testing sets
X = df[['Age', 'Gender', 'Distance', 'NameLength']]
y = df['Complain']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply polynomial features
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

# Train a logistic regression model
model = LogisticRegression(max_iter=500)
model.fit(X_train_poly, y_train)

# Evaluate the model
print('Model Score:', model.score(X_test_poly, y_test))


Model Score: 0.75
