In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [2]:
def pca_selection(X, n_components):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    return X_pca, pca

def split_scalar(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def cm_prediction(classifier, X_test, y_test):
    y_pred = classifier.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    Accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return classifier, Accuracy, report, cm

def logistic(X_train, y_train, X_test, y_test):
    classifier = LogisticRegression(random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def svm_linear(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='linear', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def svm_NL(X_train, y_train, X_test, y_test):
    classifier = SVC(kernel='rbf', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def navie(X_train, y_train, X_test, y_test):
    classifier = GaussianNB()
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def knn(X_train, y_train, X_test, y_test):
    classifier = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=2)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def decision_tree(X_train, y_train, X_test, y_test):
    classifier = DecisionTreeClassifier(criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)

def random_forest(X_train, y_train, X_test, y_test):
    classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0)
    classifier.fit(X_train, y_train)
    return cm_prediction(classifier, X_test, y_test)


In [6]:

dataset = pd.read_csv("Wine.csv")


column_names = dataset.columns


print("Column names in the dataset:")
for name in column_names:
    print(name)


Column names in the dataset:
Alcohol
Malic_Acid
Ash
Ash_Alcanity
Magnesium
Total_Phenols
Flavanoids
Nonflavanoid_Phenols
Proanthocyanins
Color_Intensity
Hue
OD280
Proline
Customer_Segment


In [18]:
def main():
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.decomposition import PCA
    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
    
    # Read the dataset
    dataset = pd.read_csv("Wine.csv")
    
    # Split features and target variable
    X = dataset.drop('Customer_Segment', axis=1)
    y = dataset['Customer_Segment']
    
    # PCA selection
    X_pca, pca_model = pca_selection(X, n_components=6)

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = split_scalar(X_pca, y)

    # Define classifiers
    classifiers = {
        "Logistic Regression": logistic,
        "SVM Linear": svm_linear,
        "SVM Non-Linear": svm_NL,
        "Naive Bayes": navie,
        "KNN": knn,
        "Decision Tree": decision_tree,
        "Random Forest": random_forest
    }

    # Perform classification
    results = {}
    for name, classifier in classifiers.items():
        _, accuracy, report, cm = classifier(X_train, y_train, X_test, y_test)
        results[name] = {"Accuracy": accuracy, "Classification Report": report, "Confusion Matrix": cm}

    return results

if __name__ == "__main__":
    results = main()
    for name, metrics in results.items():
        print(f"{name}:")
        print(f"Accuracy: {metrics['Accuracy']}")
        print(f"Classification Report:\n{metrics['Classification Report']}")
        print(f"Confusion Matrix:\n{metrics['Confusion Matrix']}\n")

Logistic Regression:
Accuracy: 0.9333333333333333
Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.88      0.93        16
           2       0.91      0.95      0.93        21
           3       0.89      1.00      0.94         8

    accuracy                           0.93        45
   macro avg       0.93      0.94      0.93        45
weighted avg       0.94      0.93      0.93        45

Confusion Matrix:
[[14  2  0]
 [ 0 20  1]
 [ 0  0  8]]

SVM Linear:
Accuracy: 0.9555555555555556
Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.94      0.97        16
           2       0.95      0.95      0.95        21
           3       0.89      1.00      0.94         8

    accuracy                           0.96        45
   macro avg       0.95      0.96      0.95        45
weighted avg       0.96      0.96      0.96        45

Confusion Matrix:
[[15  1  0]
 [ 0 20  1]
 [ 