# Essential scikit-learn Commands

This notebook demonstrates common steps in training models with scikit-learn.

## 1. Import Libraries

In [None]:
import pandas as pdfrom sklearn.model_selection import train_test_split, cross_val_scorefrom sklearn.preprocessing import StandardScalerfrom sklearn.linear_model import LogisticRegression, LinearRegressionfrom sklearn.pipeline import make_pipelinefrom sklearn.metrics import accuracy_score, mean_squared_errorimport joblib

## 2. Load Data (CSV Example)

In [None]:
df = pd.read_csv('data.csv')X = df.drop(['label', 'target'], axis=1)y_clf = df['label']y_reg = df['target']

## 3. Train/Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_clf, test_size=0.2, random_state=42)Xr_train, Xr_test, yr_train, yr_test = train_test_split(X, y_reg, test_size=0.2, random_state=42)

## 4. Preprocess Features

In [None]:
scaler = StandardScaler()X_train_scaled = scaler.fit_transform(X_train)X_test_scaled = scaler.transform(X_test)scaler_reg = StandardScaler()Xr_train_scaled = scaler_reg.fit_transform(Xr_train)Xr_test_scaled = scaler_reg.transform(Xr_test)

## 5. Train a Classification Model

In [None]:
clf_model = LogisticRegression()clf_model.fit(X_train_scaled, y_train)

## 6. Evaluate the Classification Model

In [None]:
clf_preds = clf_model.predict(X_test_scaled)acc = accuracy_score(y_test, clf_preds)print(f'Accuracy: {acc:.4f}')

## 7. Cross Validation (Classification)

In [None]:
clf_pipeline = make_pipeline(StandardScaler(), LogisticRegression())cv_scores = cross_val_score(clf_pipeline, X, y_clf, cv=5)print(f'CV Accuracy: {cv_scores.mean():.4f}')

## 8. Train a Regression Model

In [None]:
reg_model = LinearRegression()reg_model.fit(Xr_train_scaled, yr_train)

## 9. Evaluate the Regression Model

In [None]:
reg_preds = reg_model.predict(Xr_test_scaled)mse = mean_squared_error(yr_test, reg_preds)print(f'MSE: {mse:.4f}')

## 10. Cross Validation (Regression)

In [None]:
reg_pipeline = make_pipeline(StandardScaler(), LinearRegression())cv_mse = cross_val_score(reg_pipeline, X, y_reg, cv=5, scoring='neg_mean_squared_error')print(f'CV MSE: {-cv_mse.mean():.4f}')

## 11. Save and Load the Model

In [None]:
joblib.dump(clf_model, 'clf_model.joblib')loaded_clf = joblib.load('clf_model.joblib')