In [4]:
# Let's read the uploaded CSV file and inspect its contents to prepare it for classification using Logistic Regression and K-NN
import pandas as pd

# Load the dataset from the uploaded file
file_path = 'Iris.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
df.head(10)


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
5,6,5.4,3.9,1.7,0.4,Iris-setosa
6,7,4.6,3.4,1.4,0.3,Iris-setosa
7,8,5.0,3.4,1.5,0.2,Iris-setosa
8,9,4.4,2.9,1.4,0.2,Iris-setosa
9,10,4.9,3.1,1.5,0.1,Iris-setosa


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Remove unnecessary 'Id' column and encode target labels
df = df.drop(columns=['Id'])
label_encoder = LabelEncoder()
df['Species'] = label_encoder.fit_transform(df['Species'])

# Step 2: Define features (X) and target (y)
X = df.drop(columns=['Species'])  # Features
y = df['Species']                # Target

# Step 3: Split the dataset into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Standardize the data (important for K-NN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 5: Train Logistic Regression model
logreg = LogisticRegression(max_iter=200)
logreg.fit(X_train, y_train)

# Step 6: Train K-Nearest Neighbors (K-NN) model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Step 7: Make predictions with both models
y_pred_logreg = logreg.predict(X_test)
y_pred_knn = knn.predict(X_test)

# Step 8: Evaluate Logistic Regression model
logreg_accuracy = accuracy_score(y_test, y_pred_logreg)
logreg_report = classification_report(y_test, y_pred_logreg)

# Step 9: Evaluate K-NN model
knn_accuracy = accuracy_score(y_test, y_pred_knn)
knn_report = classification_report(y_test, y_pred_knn)

# Menampilkan hasil secara rapi
print(f"Logistic Regression Accuracy: {logreg_accuracy:.2f}")
print("Logistic Regression Classification Report:")
print(logreg_report)

print(f"K-Nearest Neighbors (K-NN) Accuracy: {knn_accuracy:.2f}")
print("K-Nearest Neighbors (K-NN) Classification Report:")
print(knn_report)


Logistic Regression Accuracy: 1.00
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

K-Nearest Neighbors (K-NN) Accuracy: 1.00
K-Nearest Neighbors (K-NN) Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [6]:
# Let's read the uploaded CSV file and inspect its contents to prepare it for classification using Logistic Regression and K-NN
import pandas as pd

# Load the dataset from the uploaded file
file_path = 'Social_Network_Ads.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
df.head(20)


Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0
5,27,58000,0
6,27,84000,0
7,32,150000,1
8,25,33000,0
9,35,65000,0


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Define features (X) and target (y)
X = df[['Age', 'EstimatedSalary']]  # Features
y = df['Purchased']                 # Target

# Step 2: Split the dataset into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Standardize the data (important for K-NN)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 4: Train Logistic Regression model
logreg = LogisticRegression(max_iter=200)
logreg.fit(X_train, y_train)

# Step 5: Train K-Nearest Neighbors (K-NN) model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Step 6: Make predictions with both models
y_pred_logreg = logreg.predict(X_test)
y_pred_knn = knn.predict(X_test)

# Step 7: Evaluate Logistic Regression model
logreg_accuracy = accuracy_score(y_test, y_pred_logreg)
logreg_report = classification_report(y_test, y_pred_logreg)

# Step 8: Evaluate K-NN model
knn_accuracy = accuracy_score(y_test, y_pred_knn)
knn_report = classification_report(y_test, y_pred_knn)

# Menampilkan hasil secara rapi
print(f"Logistic Regression Accuracy: {logreg_accuracy:.2f}")
print("Logistic Regression Classification Report:")
print(logreg_report)

print(f"K-Nearest Neighbors (K-NN) Accuracy: {knn_accuracy:.2f}")
print("K-Nearest Neighbors (K-NN) Classification Report:")
print(knn_report)


Logistic Regression Accuracy: 0.86
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.96      0.90        52
           1       0.90      0.68      0.78        28

    accuracy                           0.86        80
   macro avg       0.88      0.82      0.84        80
weighted avg       0.87      0.86      0.86        80

K-Nearest Neighbors (K-NN) Accuracy: 0.91
K-Nearest Neighbors (K-NN) Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.92      0.93        52
           1       0.86      0.89      0.88        28

    accuracy                           0.91        80
   macro avg       0.90      0.91      0.90        80
weighted avg       0.91      0.91      0.91        80

