In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
import pickle
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Make sure the file is in the same folder or provide the full path
df = pd.read_excel('Pumpkin_Seeds_Dataset.xlsx')
df.head()

Unnamed: 0,Area,Perimeter,Major_Axis_Length,Minor_Axis_Length,Convex_Area,Equiv_Diameter,Eccentricity,Solidity,Extent,Roundness,Aspect_Ration,Compactness,Class
0,56276,888.242,326.1485,220.2388,56831,267.6805,0.7376,0.9902,0.7453,0.8963,1.4809,0.8207,Çerçevelik
1,76631,1068.146,417.1932,234.2289,77280,312.3614,0.8275,0.9916,0.7151,0.844,1.7811,0.7487,Çerçevelik
2,71623,1082.987,435.8328,211.0457,72663,301.9822,0.8749,0.9857,0.74,0.7674,2.0651,0.6929,Çerçevelik
3,66458,992.051,381.5638,222.5322,67118,290.8899,0.8123,0.9902,0.7396,0.8486,1.7146,0.7624,Çerçevelik
4,66107,998.146,383.8883,220.4545,67117,290.1207,0.8187,0.985,0.6752,0.8338,1.7413,0.7557,Çerçevelik


In [3]:
df.isnull().sum()

Area                 0
Perimeter            0
Major_Axis_Length    0
Minor_Axis_Length    0
Convex_Area          0
Equiv_Diameter       0
Eccentricity         0
Solidity             0
Extent               0
Roundness            0
Aspect_Ration        0
Compactness          0
Class                0
dtype: int64

In [4]:
le = LabelEncoder()
df['Class'] = le.fit_transform(df['Class'])
# 0 = Çerçevelik, 1 = Ürgüp Sivrisi

In [5]:
df = df.drop(columns=['Convex_Area', 'Equiv_Diameter', 'Eccentricity', 'Minor_Axis_Length'])
df.head()

Unnamed: 0,Area,Perimeter,Major_Axis_Length,Solidity,Extent,Roundness,Aspect_Ration,Compactness,Class
0,56276,888.242,326.1485,0.9902,0.7453,0.8963,1.4809,0.8207,0
1,76631,1068.146,417.1932,0.9916,0.7151,0.844,1.7811,0.7487,0
2,71623,1082.987,435.8328,0.9857,0.74,0.7674,2.0651,0.6929,0
3,66458,992.051,381.5638,0.9902,0.7396,0.8486,1.7146,0.7624,0
4,66107,998.146,383.8883,0.985,0.6752,0.8338,1.7413,0.7557,0


In [6]:
columns_to_scale = ['Area', 'Perimeter', 'Major_Axis_Length']
scaler = MinMaxScaler()
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])

# Save the scaler for the app
pickle.dump(scaler, open("scaler.pkl", "wb"))

In [7]:
x = df.drop(['Class'], axis=1)
y = df['Class']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=30)
print(x_train.shape, x_test.shape)

(2000, 8) (500, 8)


In [8]:
logistic_regression = LogisticRegression()
logistic_regression.fit(x_train, y_train)
y_pred = logistic_regression.predict(x_test)

acc_lr = accuracy_score(y_test, y_pred)
c_lr = classification_report(y_test, y_pred)

print("Accuracy Score: ", acc_lr)
print(c_lr)

Accuracy Score:  0.856
              precision    recall  f1-score   support

           0       0.82      0.90      0.86       247
           1       0.90      0.81      0.85       253

    accuracy                           0.86       500
   macro avg       0.86      0.86      0.86       500
weighted avg       0.86      0.86      0.86       500



In [9]:
random_forest = RandomForestClassifier()
random_forest.fit(x_train, y_train)
y_pred = random_forest.predict(x_test)

acc_rf = accuracy_score(y_test, y_pred)
c_rf = classification_report(y_test, y_pred)

print("Accuracy Score: ", acc_rf)
print(c_rf)

Accuracy Score:  0.878
              precision    recall  f1-score   support

           0       0.85      0.92      0.88       247
           1       0.91      0.84      0.87       253

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



In [10]:
decision_tree_model = DecisionTreeClassifier()
decision_tree_model.fit(x_train, y_train)
y_pred = decision_tree_model.predict(x_test)

acc_dt = accuracy_score(y_test, y_pred)
c_dt = classification_report(y_test, y_pred)

print("Accuracy Score: ", acc_dt)
print(c_dt)

Accuracy Score:  0.83
              precision    recall  f1-score   support

           0       0.83      0.83      0.83       247
           1       0.83      0.83      0.83       253

    accuracy                           0.83       500
   macro avg       0.83      0.83      0.83       500
weighted avg       0.83      0.83      0.83       500



In [11]:
NB = MultinomialNB()
NB.fit(x_train, y_train)
y_pred = NB.predict(x_test)

acc_nb = accuracy_score(y_test, y_pred)
c_nb = classification_report(y_test, y_pred)

print("Accuracy Score: ", acc_nb)
print(c_nb)

Accuracy Score:  0.714
              precision    recall  f1-score   support

           0       0.64      0.97      0.77       247
           1       0.94      0.46      0.62       253

    accuracy                           0.71       500
   macro avg       0.79      0.72      0.70       500
weighted avg       0.79      0.71      0.69       500



In [12]:
support_vector = SVC()
support_vector.fit(x_train, y_train)
y_pred = support_vector.predict(x_test)

acc_svc = accuracy_score(y_test, y_pred)
c_svc = classification_report(y_test, y_pred)

print("Accuracy Score: ", acc_svc)
print(c_svc)

Accuracy Score:  0.854
              precision    recall  f1-score   support

           0       0.81      0.93      0.86       247
           1       0.92      0.78      0.84       253

    accuracy                           0.85       500
   macro avg       0.86      0.85      0.85       500
weighted avg       0.86      0.85      0.85       500



In [13]:
GBC = GradientBoostingClassifier()
GBC.fit(x_train, y_train)
y_pred = GBC.predict(x_test)

acc_gbc = accuracy_score(y_test, y_pred)
c_gbc = classification_report(y_test, y_pred)

print("Accuracy Score: ", acc_gbc)
print(c_gbc)

Accuracy Score:  0.876
              precision    recall  f1-score   support

           0       0.84      0.93      0.88       247
           1       0.92      0.83      0.87       253

    accuracy                           0.88       500
   macro avg       0.88      0.88      0.88       500
weighted avg       0.88      0.88      0.88       500



In [14]:
pickle.dump(random_forest, open("model.pkl", "wb"))
print("Model saved successfully.")

Model saved successfully.
