In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score

data = pd.read_csv("glass.csv")

features = ['RI', 'Na', 'Mg', 'Al', 'Si', 'K', 'Ca', 'Ba', 'Fe']
results = []
for feature in features:
    X = data[[feature]]
    y = data['Type']
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train_scaled, y_train)
    
    y_prob = model.predict_proba(X_test_scaled)
    
    def evaluate_threshold(threshold, y_true, y_prob):
        y_pred = (y_prob[:, 1] >= threshold).astype(int)
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='micro')
        recall = recall_score(y_true, y_pred, average='micro')
        return accuracy, precision, recall
    
    thresholds = [0.3, 0.4, 0.5, 0.6, 0.7]
    
    threshold_list = []
    accuracy_list = []
    precision_list = []
    recall_list = []
    
    # Evaluate model performance for each threshold
    for threshold in thresholds:
        accuracy, precision, recall = evaluate_threshold(threshold, y_test, y_prob)
        threshold_list.append(threshold)
        accuracy_list.append(accuracy)
        precision_list.append(precision)
        recall_list.append(recall)
        
    results.append({
        'Feature': feature,
        'Threshold': threshold_list,
        'Accuracy': accuracy_list,
        'Precision': precision_list,
        'Recall': recall_list
    })
results = pd.DataFrame(results)
print(results)
# You can repeat the same analysis for other columns by changing the target variable 'y' and rerunning the code


  Feature                  Threshold  \
0      RI  [0.3, 0.4, 0.5, 0.6, 0.7]   
1      Na  [0.3, 0.4, 0.5, 0.6, 0.7]   
2      Mg  [0.3, 0.4, 0.5, 0.6, 0.7]   
3      Al  [0.3, 0.4, 0.5, 0.6, 0.7]   
4      Si  [0.3, 0.4, 0.5, 0.6, 0.7]   
5       K  [0.3, 0.4, 0.5, 0.6, 0.7]   
6      Ca  [0.3, 0.4, 0.5, 0.6, 0.7]   
7      Ba  [0.3, 0.4, 0.5, 0.6, 0.7]   
8      Fe  [0.3, 0.4, 0.5, 0.6, 0.7]   

                                            Accuracy  \
0  [0.2558139534883721, 0.046511627906976744, 0.0...   
1  [0.23255813953488372, 0.18604651162790697, 0.0...   
2  [0.2558139534883721, 0.023255813953488372, 0.0...   
3  [0.20930232558139536, 0.09302325581395349, 0.0...   
4           [0.2558139534883721, 0.0, 0.0, 0.0, 0.0]   
5          [0.23255813953488372, 0.0, 0.0, 0.0, 0.0]   
6  [0.2558139534883721, 0.046511627906976744, 0.0...   
7           [0.2558139534883721, 0.0, 0.0, 0.0, 0.0]   
8  [0.2558139534883721, 0.09302325581395349, 0.02...   

                                      

In [10]:
X_new = data.drop(columns=['Type'])
y = data['Type']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess numerical features by standardizing them
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fit Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

# Predict probabilities for each class
y_prob = model.predict_proba(X_test_scaled)

# Function to evaluate model performance with different thresholds
def evaluate_threshold(threshold, y_true, y_prob):
    y_pred = (y_prob[:, 1] >= threshold).astype(int)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='micro')  # Set average to 'micro' for multiclass
    recall = recall_score(y_true, y_pred, average='micro')  # Set average to 'micro' for multiclass
    return accuracy, precision, recall

# Define thresholds to try
thresholds = [0.3, 0.4, 0.5, 0.6, 0.7]

# Initialize lists to store results
threshold_list = []
accuracy_list = []
precision_list = []
recall_list = []

# Evaluate model performance for each threshold
for threshold in thresholds:
    accuracy, precision, recall = evaluate_threshold(threshold, y_test, y_prob)
    threshold_list.append(threshold)
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)

# Create DataFrame from results
results = pd.DataFrame({
    'Threshold': threshold_list,
    'Accuracy': accuracy_list,
    'Precision': precision_list,
    'Recall': recall_list
})

print(results)

   Threshold  Accuracy  Precision    Recall
0        0.3  0.255814   0.255814  0.255814
1        0.4  0.093023   0.093023  0.093023
2        0.5  0.023256   0.023256  0.023256
3        0.6  0.000000   0.000000  0.000000
4        0.7  0.000000   0.000000  0.000000
