# Logistic Regression
## just seeing how well features dataset does

In [63]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [64]:
features_df = pd.read_csv('images.csv')

X = features_df.drop(columns=['Image_Name', 'Target'])
y = features_df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42, shuffle=True)

In [65]:
model = LogisticRegression(max_iter=1000, C=0.1)  # Increase max_iter if the model struggles to converge
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9948453608247423
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00       107
           1       0.99      1.00      0.99        87

    accuracy                           0.99       194
   macro avg       0.99      1.00      0.99       194
weighted avg       0.99      0.99      0.99       194



In [70]:
y_train_pred = model.predict(X_train)
accuracy_score(y_train, y_train_pred) # high training error -> overfitting

1.0

In [66]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)


[[106   1]
 [  0  87]]


In [67]:
from sklearn.metrics import precision_recall_curve
precision, recall, thresholds = precision_recall_curve(y_test, model.predict_proba(X_test)[:, 1])

print(precision, recall, thresholds)

[0.44845361 0.4507772  0.453125   0.45549738 0.45789474 0.46031746
 0.46276596 0.46524064 0.46774194 0.47027027 0.47282609 0.47540984
 0.47802198 0.48066298 0.48333333 0.48603352 0.48876404 0.49152542
 0.49431818 0.49714286 0.5        0.50289017 0.50581395 0.50877193
 0.51176471 0.5147929  0.51785714 0.52095808 0.52409639 0.52727273
 0.5304878  0.53374233 0.53703704 0.54037267 0.54375    0.54716981
 0.55063291 0.55414013 0.55769231 0.56129032 0.56493506 0.56862745
 0.57236842 0.57615894 0.58       0.58389262 0.58783784 0.59183673
 0.59589041 0.6        0.60416667 0.60839161 0.61267606 0.61702128
 0.62142857 0.62589928 0.63043478 0.6350365  0.63970588 0.64444444
 0.64925373 0.65413534 0.65909091 0.66412214 0.66923077 0.6744186
 0.6796875  0.68503937 0.69047619 0.696      0.7016129  0.70731707
 0.71311475 0.71900826 0.725      0.73109244 0.73728814 0.74358974
 0.75       0.75652174 0.76315789 0.7699115  0.77678571 0.78378378
 0.79090909 0.79816514 0.80555556 0.81308411 0.82075472 0.82857

In [68]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(model, X, y, cv=stratified_kfold)
print("Stratified cross-validation scores:", scores)


Stratified cross-validation scores: [1.         0.98969072 0.98969072 1.         1.        ]


# mlp?

In [71]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Assuming you already have features_df and your target column in place
X = features_df.drop(columns=['Image_Name', 'Target'])
y = features_df['Target']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [72]:
# Initialize the model with a basic architecture (1 hidden layer, 10 neurons)
model = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000, random_state=42)

# Fit the model to the training data
model.fit(X_train, y_train)




In [73]:
# Predict on the training set
y_train_pred = model.predict(X_train)

# Calculate accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
print("Training Accuracy:", train_accuracy)


Training Accuracy: 0.9381443298969072


In [74]:
# Predict on the test set
y_test_pred = model.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", test_accuracy)


Test Accuracy: 0.9072164948453608


In [75]:
model = MLPClassifier(hidden_layer_sizes=(50, 30), max_iter=1000, learning_rate_init=0.001, activation='relu', random_state=42)

from sklearn.model_selection import cross_val_score

# Cross-validation score
cv_scores = cross_val_score(model, X, y, cv=5)
print("Cross-validation scores:", cv_scores)
print("Average cross-validation score:", cv_scores.mean())


Cross-validation scores: [0.93814433 0.97938144 0.97938144 1.         0.68041237]
Average cross-validation score: 0.9154639175257732
