In [4]:
# Step 1: Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

# Step 2: Load the classified dataset into a dataframe using pandas
# Assuming 'data.csv' is the name of your dataset file
df = pd.read_csv('Classified_Data.txt')

# Step 3: Check the data types of each feature(column) in the dataset
print("Data Types of Each Feature:")
print(df.dtypes)

# Step 4: Generate a summary of the dataset for min, max, stddev, quartile values for 25%,50%,75%,90%
print("\nSummary of the Dataset:")
print(df.describe(percentiles=[0.25, 0.50, 0.75, 0.90]))

# Step 5: List the names of columns/features in the dataset
print("\nNames of Columns/Features in the Dataset:")
print(df.columns)

# Step 6: Scale the features using StandardScaler and transform the data
scaler = StandardScaler()

# Check if 'target_column' exists in the DataFrame
if 'target_column' in df.columns:
    scaled_data = scaler.fit_transform(df.drop(columns=['target_column']))
    scaled_df = pd.DataFrame(scaled_data, columns=df.drop(columns=['target_column']).columns)
    # Display the scaled dataset
    print("\nScaled Data:")
    print(scaled_df.head())
else:
    print("'target_column' not found in the DataFrame.")



Data Types of Each Feature:
Unnamed: 0        int64
WTT             float64
PTI             float64
EQW             float64
SBI             float64
LQE             float64
QWG             float64
FDJ             float64
PJF             float64
HQE             float64
NXJ             float64
TARGET CLASS      int64
dtype: object

Summary of the Dataset:
        Unnamed: 0          WTT          PTI          EQW          SBI  \
count  1000.000000  1000.000000  1000.000000  1000.000000  1000.000000   
mean    499.500000     0.949682     1.114303     0.834127     0.682099   
std     288.819436     0.289635     0.257085     0.291554     0.229645   
min       0.000000     0.174412     0.441398     0.170924     0.045027   
25%     249.750000     0.742358     0.942071     0.615451     0.515010   
50%     499.500000     0.940475     1.118486     0.813264     0.676835   
75%     749.250000     1.163295     1.307904     1.028340     0.834317   
90%     899.100000     1.336612     1.441901     1.22

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

# Step 1: Load the classified dataset into a dataframe using pandas
df = pd.read_csv('Classified_Data.txt')  # Replace 'your_dataset.csv' with your dataset filename

# Step 2: Scale the features using StandardScaler
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df.drop(columns=['TARGET CLASS']))
scaled_df = pd.DataFrame(scaled_data, columns=df.drop(columns=['TARGET CLASS']).columns)

# Step 3: Split the data into training and testing sets
X = scaled_df  # Features
y = df['TARGET CLASS']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Apply the KNN Classifier model
knn_classifier = KNeighborsClassifier(n_neighbors=5)  # You can adjust the number of neighbors as needed

# Step 5: Fit the data to the Classifier Model
knn_classifier.fit(X_train, y_train)

print("Model training and fitting completed successfully.")


Model training and fitting completed successfully.


In [8]:
from sklearn.metrics import confusion_matrix, classification_report

# Step 1: Generate the confusion matrix
y_pred = knn_classifier.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Step 2: Generate the classification report
class_report = classification_report(y_test, y_pred)
print("\nClassification Report:")
print(class_report)


Confusion Matrix:
[[92  6]
 [ 4 98]]

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.94      0.95        98
           1       0.94      0.96      0.95       102

    accuracy                           0.95       200
   macro avg       0.95      0.95      0.95       200
weighted avg       0.95      0.95      0.95       200

