# Employee Churn — SQL → pandas → Logistic Regression

In [None]:
import sqlite3, pandas as pd, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
# Load from SQLite
con = sqlite3.connect('data/hr.db')
df = pd.read_sql_query('SELECT * FROM employees', con)
con.close()
print(df.head())
# Features/label
y = df['left_company'].astype(int)
X = df.drop(columns=['left_company','employee_id'])
num_cols = ['age','years_at_company','monthly_income','over_time']
cat_cols = ['dept']
pre = ColumnTransformer([
  ('num', StandardScaler(), num_cols),
  ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols)
])
clf = Pipeline([
  ('prep', pre),
  ('lr', LogisticRegression(max_iter=1000))
])
Xtr,Xte,ytr,yte = train_test_split(X,y,test_size=0.4,random_state=42)
clf.fit(Xtr,ytr)
print('Train score:', clf.score(Xtr,ytr))
print('Test  score:', clf.score(Xte,yte))
print(classification_report(yte, clf.predict(Xte)))