In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [9]:
df = pd.read_csv("/content/fashion-mnist_train.txt")

In [10]:
df.to_csv("/content/fashion-mnist_train.txt", index=False)

In [11]:
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,9,0,0,0,0,0,0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,6,0,0,0,0,0,0,0,5.0,0.0,...,0.0,0.0,0.0,30.0,43.0,0.0,0.0,0.0,0.0,0.0
3,0,0,0,0,1,2,0,0,0.0,0.0,...,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,3,0,0,0,0,0,0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
X = df.drop('label', axis=1)
y = df['label']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [17]:
from sklearn.impute import SimpleImputer

imputer = SimpleImputer(strategy='mean')
X_train_scaled_imputed = imputer.fit_transform(X_train_scaled)

X_test_scaled_imputed = imputer.transform(X_test_scaled)

In [18]:
pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train_scaled_imputed)
X_test_pca = pca.transform(X_test_scaled_imputed)

In [19]:
log_reg = LogisticRegression()

In [20]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10],
              'solver': ['liblinear', 'saga'],
              'max_iter': [100, 200, 300]}
grid_search = GridSearchCV(log_reg, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_pca, y_train)
best_params = grid_search.best_params_



In [21]:
best_log_reg = LogisticRegression(**best_params)
best_log_reg.fit(X_train_pca, y_train)



In [22]:
cv_accuracy = cross_val_score(best_log_reg, X_train_pca, y_train, cv=5, scoring='accuracy').mean()



In [23]:
y_pred = best_log_reg.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [24]:
print("Best Hyperparameters:", best_params)
print("Cross-Validation Accuracy:", cv_accuracy)
print("Test Set Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

Best Hyperparameters: {'C': 0.01, 'max_iter': 200, 'solver': 'saga'}
Cross-Validation Accuracy: 0.8348491307848989
Test Set Accuracy: 0.8370457209847597
Confusion Matrix:
 [[74  0  0  6  1  0  7  0  0  0]
 [ 0 81  0  2  0  0  0  0  1  0]
 [ 0  0 71  0 12  0  7  0  0  0]
 [ 1  2  2 85  7  0  3  0  0  0]
 [ 0  1  8  1 64  1 14  0  1  0]
 [ 0  0  0  0  0 74  0  2  0  4]
 [15  0  9  1  7  0 41  0  1  0]
 [ 0  0  0  0  0  7  0 79  0  3]
 [ 0  0  1  1  0  2  2  0 66  1]
 [ 0  0  0  0  0  2  0  4  0 79]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.84      0.83        88
           1       0.96      0.96      0.96        84
           2       0.78      0.79      0.78        90
           3       0.89      0.85      0.87       100
           4       0.70      0.71      0.71        90
           5       0.86      0.93      0.89        80
           6       0.55      0.55      0.55        74
           7       0.93      0.89      0

In [25]:
from sklearn.ensemble import RandomForestClassifier


rf = RandomForestClassifier()

In [None]:
param_grid = {'n_estimators': [100, 200, 300],
              'max_depth': [None, 10, 20],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4]}
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_pca, y_train)
best_params_rf = grid_search.best_params_

In [None]:
best_rf = RandomForestClassifier(**best_params_rf)
best_rf.fit(X_train_pca, y_train)

In [None]:
y_pred_rf = best_rf.predict(X_test_pca)
accuracy_rf = accuracy_score(y_test, y_pred_rf)

In [None]:
from sklearn.svm import SVC

svm = SVC()


param_grid_svm = {'C': [0.1, 1, 10],
                  'kernel': ['linear', 'rbf', 'poly']}
grid_search_svm = GridSearchCV(svm, param_grid_svm, cv=5, scoring='accuracy')
grid_search_svm.fit(X_train_pca, y_train)
best_params_svm = grid_search_svm.best_params_


best_svm = SVC(**best_params_svm)
best_svm.fit(X_train_pca, y_train)


y_pred_svm = best_svm.predict(X_test_pca)
accuracy_svm = accuracy_score(y_test, y_pred_svm)


In [None]:
import xgboost as xgb

# Model Initialization
xgb_model = xgb.XGBClassifier()

# Hyperparameter Tuning
param_grid_xgb = {'n_estimators': [100, 200, 300],
                  'max_depth': [3, 5, 7],
                  'learning_rate': [0.01, 0.1, 0.3]}
grid_search_xgb = GridSearchCV(xgb_model, param_grid_xgb, cv=5, scoring='accuracy')
grid_search_xgb.fit(X_train, y_train)
best_params_xgb = grid_search_xgb.best_params_

# Training the model with best hyperparameters
best_xgb = xgb.XGBClassifier(**best_params_xgb)
best_xgb.fit(X_train, y_train)

# Evaluate the model
y_pred_xgb = best_xgb.predict(X_test)
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)


In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Initialization
knn = KNeighborsClassifier()

# Optimal K Selection - Hyperparameter Tuning
param_grid_knn = {'n_neighbors': [3, 5, 7, 9, 11]}
grid_search_knn = GridSearchCV(knn, param_grid_knn, cv=5, scoring='accuracy')
grid_search_knn.fit(X_train_scaled, y_train)
best_params_knn = grid_search_knn.best_params_

# Training the model with best hyperparameters
best_knn = KNeighborsClassifier(**best_params_knn)
best_knn.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred_knn = best_knn.predict(X_test_scaled)
accuracy_knn = accuracy_score(y_test, y_pred_knn)


In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.base import clone

# Model Initialization - Base Models
rf = RandomForestClassifier()
gbm = GradientBoostingClassifier()

# Voting Classifier
voting_clf = VotingClassifier(estimators=[('rf', rf), ('gbm', gbm)], voting='hard')
voting_clf.fit(X_train, y_train)

# Evaluate the Voting Classifier
accuracy_voting = voting_clf.score(X_test, y_test)

# Stacking
meta_lr = LogisticRegression()

# Clone the base models to avoid interference
rf_clone = clone(rf)
gbm_clone = clone(gbm)

# Fit the base models
rf_clone.fit(X_train, y_train)
gbm_clone.fit(X_train, y_train)

# Generate predictions for the meta learner
rf_pred = rf_clone.predict(X_train)
gbm_pred = gbm_clone.predict(X_train)

# Create a new feature matrix for the meta learner
X_train_stacked = np.column_stack((rf_pred, gbm_pred))

# Fit the meta learner
meta_lr.fit(X_train_stacked, y_train)

# Generate predictions for the test set and create the stacked feature matrix
rf_pred_test = rf_clone.predict(X_test)
gbm_pred_test = gbm_clone.predict(X_test)
X_test_stacked = np.column_stack((rf_pred_test, gbm_pred_test))

# Evaluate the Stacking ensemble
accuracy_stacking = meta_lr.score(X_test_stacked, y_test)


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Model Initialization - Base Model
base_model = RandomForestClassifier()

# Bagging (Random Forest)
bagging_model = RandomForestClassifier()

# Fit the Bagging model
bagging_model.fit(X_train, y_train)

# Evaluate the Bagging model
accuracy_bagging = bagging_model.score(X_test, y_test)


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Model Initialization - Base Model
base_model = GradientBoostingClassifier()

# Boosting (Gradient Boosting Machines)
boosting_model = GradientBoostingClassifier()

# Fit the Boosting model
boosting_model.fit(X_train, y_train)

# Evaluate the Boosting model
accuracy_boosting = boosting_model.score(X_test, y_test)