In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

### Loading and Splitting Data

In [4]:
# Generating Sample Data
data = {
    'Feature1': np.random.rand(100),
    'Feature2': np.random.rand(100),
    'Target': np.random.choice([0, 1], 100)
}
df = pd.DataFrame(data)

# Splitting into Train and Test Sets
X = df[['Feature1', 'Feature2']]
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Data Preproccessing

In [5]:
# Standardization
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Min-Max Scaling
minmax = MinMaxScaler()
X_train_mm = minmax.fit_transform(X_train)
X_test_mm = minmax.transform(X_test)

### Encoding Categorical Data

In [9]:
# Label Encoding
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# One-Hot Encoding
encoder = OneHotEncoder()
categorical_features = np.array([['A'], ['B'], ['C'], ['A'], ['C']])
one_hot_encoded = encoder.fit_transform(categorical_features)

### Training Machine Learning Models

In [11]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
preds = lr.predict(X_test)

# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_log = log_reg.predict(X_test)

# Decision Tree Classifier
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

# Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

# Support Vector Machine (SVM)
svm = SVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

# K-Nearest Neighbors (KNN)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

### Evaluating Model Performance

In [12]:
# Accuracy Score
print("Accuracy:", accuracy_score(y_test, y_pred_rf))

# Confusion Matrix
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rf))

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred_rf))

Accuracy: 0.65
Confusion Matrix:
 [[8 2]
 [5 5]]
Classification Report:
               precision    recall  f1-score   support

           0       0.62      0.80      0.70        10
           1       0.71      0.50      0.59        10

    accuracy                           0.65        20
   macro avg       0.66      0.65      0.64        20
weighted avg       0.66      0.65      0.64        20



### Hyperparameter Tuning with GridSearchCV

In [13]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {'n_neighbors': [3, 5, 7, 9]}
gs = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
gs.fit(X_train, y_train)
print("Best Parameters:", gs.best_params_)

Best Parameters: {'n_neighbors': 7}


### Saving and Loading Models

In [15]:
'''
import joblib

# Save model
joblib.dump(rf, 'random_forest_model.pkl')

# Load model
loaded_model = joblib.load('random_forest_model.pkl')
y_loaded_pred = loaded_model.predict(X_test)
'''

"\nimport joblib\n\n# Save model\njoblib.dump(rf, 'random_forest_model.pkl')\n\n# Load model\nloaded_model = joblib.load('random_forest_model.pkl')\ny_loaded_pred = loaded_model.predict(X_test)\n"