# Data Set 1 - Loan Default Prediction Form

## Dataset provided in class

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

In [None]:
# Load the Loan Default Dataset
print("Loading Loan Default Dataset...")

# Load the dataset
try:
    df = pd.read_csv('Loan_Default.csv')
    print(f"Dataset loaded successfully with shape: {df.shape}")
except FileNotFoundError:
    print("Error: File 'Loan_Default.csv' not found.")
    exit()

# Print initial class distribution
print("\nInitial class distribution:")
class_dist = df['Status'].value_counts(normalize=True) * 100
print(class_dist)

Loading Loan Default Dataset...
Dataset loaded successfully with shape: (148670, 34)

Initial class distribution:
Status
0    75.355485
1    24.644515
Name: proportion, dtype: float64


In [None]:
# Select features
base_numerical_features = ['loan_amount', 'term', 'income', 'Credit_Score', 'dtir1']
base_categorical_features = ['Gender', 'loan_type', 'loan_purpose', 'Credit_Worthiness', 'occupancy_type']

# Keep only needed features
df = df[base_numerical_features + base_categorical_features + ['Status']]


In [None]:
# Handle missing values
print("\nHandling missing values...")
num_imputer = SimpleImputer(strategy='median')
df[base_numerical_features] = num_imputer.fit_transform(df[base_numerical_features])

cat_imputer = SimpleImputer(strategy='most_frequent')
df[base_categorical_features] = cat_imputer.fit_transform(df[base_categorical_features])



Handling missing values...


In [None]:
# Create derived features
print("\nCreating derived features...")
df['loan_to_income'] = df['loan_amount'] / (df['income'] + 1)
df['monthly_payment'] = (df['loan_amount'] * (0.06/12) * (1 + (0.06/12))**(df['term'])) / ((1 + (0.06/12))**(df['term']) - 1)
df['payment_to_income'] = (df['monthly_payment'] * 12) / (df['income'] + 1)
df['credit_score_scaled'] = df['Credit_Score'] / 850
df['disposable_income'] = (df['income']/12) * (1 - df['dtir1']/100) - df['monthly_payment']



Creating derived features...


In [None]:
# Create risk indicators
print("\nCalculating risk indicators...")
df['high_dti'] = pd.cut(df['dtir1'],
                        bins=[-float('inf'), 36, 43, 50, float('inf')],
                        labels=[0, 1, 2, 3]).fillna(3).astype(int)

df['high_loan_to_income'] = pd.cut(df['loan_to_income'],
                                  bins=[-float('inf'), 2.5, 3, 4, float('inf')],
                                  labels=[0, 1, 2, 3]).fillna(3).astype(int)

df['credit_risk'] = pd.cut(df['Credit_Score'],
                          bins=[-float('inf'), 580, 640, 700, 850],
                          labels=[3, 2, 1, 0]).fillna(3).astype(int)



Calculating risk indicators...


In [None]:
# Define final features
numerical_features = [
    'loan_amount', 'income', 'Credit_Score', 'dtir1',
    'loan_to_income', 'payment_to_income', 'monthly_payment',
    'disposable_income', 'credit_score_scaled',
    'high_dti', 'high_loan_to_income', 'credit_risk'
]


In [None]:
# Prepare data for modeling
X = df[numerical_features + base_categorical_features]
y = df['Status']


In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Scale numerical features
scaler = StandardScaler()
X_train_scaled = pd.DataFrame(
    scaler.fit_transform(X_train[numerical_features]),
    columns=numerical_features,
    index=X_train.index
)

X_test_scaled = pd.DataFrame(
    scaler.transform(X_test[numerical_features]),
    columns=numerical_features,
    index=X_test.index
)


In [None]:
# Process categorical features
le_dict = {}
for feature in base_categorical_features:
    le_dict[feature] = preprocessing.LabelEncoder()
    X_train_scaled[feature] = le_dict[feature].fit_transform(X_train[feature])
    X_test_scaled[feature] = le_dict[feature].transform(X_test[feature])


In [None]:
# Find best k using cross-validation
print("\nFinding optimal k...")
k_values = [2,3,4,5,7,9,13,15]
best_k = None
best_score = 0

for k in k_values:
    model = KNeighborsClassifier(
        n_neighbors=k,
        weights='distance',
        metric='manhattan'
    )
    scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='f1')
    mean_score = scores.mean()
    print(f"k={k}, F1 Score={mean_score:.4f}")

    if mean_score > best_score:
        best_score = mean_score
        best_k = k

print(f"\nBest k: {best_k} (F1 Score: {best_score:.4f})")



Finding optimal k...
k=2, F1 Score=0.4287
k=3, F1 Score=0.4326
k=4, F1 Score=0.4371
k=5, F1 Score=0.4316
k=7, F1 Score=0.4277
k=9, F1 Score=0.4203
k=13, F1 Score=0.4066
k=15, F1 Score=0.4004

Best k: 4 (F1 Score: 0.4371)


In [None]:
# Train final model
print("\nTraining final model...")
final_model = KNeighborsClassifier(
    n_neighbors=best_k,
    weights='distance',
    metric='manhattan'
)
final_model.fit(X_train_scaled, y_train)



Training final model...


In [None]:
# Evaluate on test set
print("\nEvaluating on test set...")
y_pred = final_model.predict(X_test_scaled)
y_prob = final_model.predict_proba(X_test_scaled)

print("\nTest Set Evaluation:")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))



Evaluating on test set...

Test Set Evaluation:

Confusion Matrix:
[[19577  2829]
 [ 4418  2910]]

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.87      0.84     22406
           1       0.51      0.40      0.45      7328

    accuracy                           0.76     29734
   macro avg       0.66      0.64      0.64     29734
weighted avg       0.74      0.76      0.75     29734



In [None]:
def predict_loan_default(input_data):
    """
    Make a prediction using the trained KNN model.
    """
    # Create a DataFrame with the same structure as training data
    input_df = pd.DataFrame([input_data])

    print("\nProcessing input data...")
    print("Original input:", input_data)

    # Calculate derived features
    input_df['loan_to_income'] = input_df['loan_amount'] / (input_df['income'] + 1)
    input_df['monthly_payment'] = (input_df['loan_amount'] * (0.06/12) * (1 + (0.06/12))**(input_df['term'])) / ((1 + (0.06/12))**(input_df['term']) - 1)
    input_df['payment_to_income'] = (input_df['monthly_payment'] * 12) / (input_df['income'] + 1)
    input_df['credit_score_scaled'] = input_df['Credit_Score'] / 850
    input_df['loan_term_years'] = input_df['term'] / 12
    input_df['income_per_month'] = input_df['income'] / 12
    input_df['disposable_income'] = input_df['income_per_month'] * (1 - input_df['dtir1']/100) - input_df['monthly_payment']

    # Create risk indicators
    input_df['high_dti'] = pd.cut(input_df['dtir1'],
                                 bins=[-float('inf'), 36, 43, 50, float('inf')],
                                 labels=[0, 1, 2, 3]).fillna(3).astype(int)
    input_df['high_loan_to_income'] = pd.cut(input_df['loan_to_income'],
                                            bins=[-float('inf'), 2.5, 3, 4, float('inf')],
                                            labels=[0, 1, 2, 3]).fillna(3).astype(int)
    input_df['credit_risk'] = pd.cut(input_df['Credit_Score'],
                                    bins=[-float('inf'), 580, 640, 700, 850],
                                    labels=[3, 2, 1, 0]).fillna(3).astype(int)

    # Process features
    print("\nProcessing features...")
    numerical_input = input_df[numerical_features].copy()
    print("Numerical features before scaling:", numerical_input.iloc[0].to_dict())

    numerical_scaled = pd.DataFrame(
        scaler.transform(numerical_input),
        columns=numerical_features
    )
    print("Numerical features after scaling:", numerical_scaled.iloc[0].to_dict())

    # Process categorical features
    for feature in base_categorical_features:
        try:
            numerical_scaled[feature] = le_dict[feature].transform([input_df[feature].iloc[0]])
        except ValueError as e:
            print(f"Error processing {feature}: {e}")
            raise

    print("\nMaking prediction using model...")
    print(f"Model parameters: {final_model.get_params()}")

    # Make prediction
    prediction = final_model.predict(numerical_scaled)
    probability = final_model.predict_proba(numerical_scaled)

    # Fix interpretation: probability[0][1] is probability of default
    # If probability of default > 0.5, prediction should be "Default"
    predicted_default = probability[0][1] > 0.5

    print(f"\nRaw prediction: {'Default' if predicted_default else 'No Default'}")
    print(f"Probability distribution: No Default: {probability[0][0]:.4f}, Default: {probability[0][1]:.4f}")

    # Get nearest neighbors for explanation
    distances, indices = final_model.kneighbors(numerical_scaled)
    print(f"\nNearest neighbor distances: {distances[0]}")

    print("\nFeature Analysis:")
    print("\nKey Financial Metrics:")
    print(f"Loan Amount: ${input_df['loan_amount'].iloc[0]:,.2f}")
    print(f"Annual Income: ${input_df['income'].iloc[0]:,.2f}")
    print(f"Monthly Payment: ${input_df['monthly_payment'].iloc[0]:,.2f}")
    print(f"Disposable Income: ${input_df['disposable_income'].iloc[0]:,.2f}")
    print(f"Credit Score: {input_df['Credit_Score'].iloc[0]}")

    print("\nRisk Ratios:")
    print(f"Loan-to-Income: {input_df['loan_to_income'].iloc[0]:.2f}x")
    print(f"Payment-to-Income: {input_df['payment_to_income'].iloc[0]*100:.1f}%")
    print(f"DTI Ratio: {input_df['dtir1'].iloc[0]:.1f}%")

    print("\nRisk Assessment:")
    dti_level = input_df['high_dti'].iloc[0]
    lti_level = input_df['high_loan_to_income'].iloc[0]
    credit_level = input_df['credit_risk'].iloc[0]

    risk_levels = ['Low', 'Moderate', 'High', 'Very High']
    print(f"DTI Risk: {risk_levels[dti_level]}")
    print(f"Loan-to-Income Risk: {risk_levels[lti_level]}")
    print(f"Credit Risk: {risk_levels[credit_level]}")

    risk_score = ((dti_level + lti_level + credit_level) / 9 * 100)
    print(f"\nOverall Risk Score: {risk_score:.1f}/100")

    return predicted_default, probability[0]


In [None]:
def get_user_input():
    print("\n\nInteractive Loan Default Prediction")
    print("===================================")

    user_input = {}

    # Get numerical features
    print("\nEnter numerical values:")
    print("\nNote: These values significantly impact your loan application.")
    for feature in ['loan_amount', 'term', 'income', 'Credit_Score']:
        while True:
            try:
                if feature == 'loan_amount':
                    value = float(input("Enter loan amount ($): "))
                elif feature == 'term':
                    value = float(input("Enter loan term (in months, e.g., 360 for 30 years): "))
                elif feature == 'income':
                    value = float(input("Enter annual income ($): "))
                    monthly_income = value / 12
                    print(f"Monthly income: ${monthly_income:,.2f}")
                elif feature == 'Credit_Score':
                    value = float(input("Enter credit score (300-850): "))

                if feature == 'Credit_Score' and (value < 300 or value > 850):
                    print("Credit Score should be between 300 and 850")
                    continue
                if value < 0:
                    print("Value cannot be negative")
                    continue
                user_input[feature] = value
                break
            except ValueError:
                print("Please enter a valid number")

    # Get monthly debt obligations
    print("\nMonthly Debt Obligations:")
    print("Please enter your current monthly debt payments (excluding the new loan):")
    while True:
        try:
            monthly_debt = float(input("Enter total monthly debt payments ($): "))
            if monthly_debt < 0:
                print("Monthly debt cannot be negative")
                continue

            # Calculate monthly income and DTI
            monthly_income = user_input['income'] / 12

            # Calculate estimated new monthly payment for the loan
            r = 0.06 / 12  # Monthly interest rate (6% annual)
            n = user_input['term']  # Number of months
            new_monthly_payment = (user_input['loan_amount'] * r * (1 + r)**n) / ((1 + r)**n - 1)

            # Calculate total monthly debt including new loan payment
            total_monthly_debt = monthly_debt + new_monthly_payment

            # Calculate DTI ratio (as a percentage)
            dti = (total_monthly_debt / monthly_income) * 100

            print(f"\nDTI Calculation Summary:")
            print(f"Current monthly debt: ${monthly_debt:,.2f}")
            print(f"Estimated new loan payment: ${new_monthly_payment:,.2f}")
            print(f"Total monthly debt: ${total_monthly_debt:,.2f}")
            print(f"Monthly income: ${monthly_income:,.2f}")
            print(f"Calculated DTI ratio: {dti:.1f}%")

            if dti > 100:
                print("\nWarning: DTI ratio exceeds 100%. This means monthly debts exceed monthly income.")
                proceed = input("Do you want to proceed with this DTI? (yes/no): ").lower()
                if proceed != 'yes':
                    continue

            user_input['dtir1'] = dti
            break
        except ValueError:
            print("Please enter a valid number")

    # Get categorical features
    print("\nEnter categorical values:")

    # Gender
    while True:
        gender = input("\nEnter Gender (Male/Female/Other): ").capitalize()
        if gender in ['Male', 'Female', 'Other']:
            user_input['Gender'] = gender
            break
        print("Invalid gender. Please enter Male, Female, or Other")

    # Loan Type
    print("\nLoan Types:")
    print("type1: Conventional Loan (traditional mortgage)")
    print("type2: FHA/VA/Special Program Loan")
    while True:
        loan_type = input("Enter loan type (type1/type2): ").lower()
        if loan_type in ['type1', 'type2']:
            user_input['loan_type'] = loan_type
            break
        print("Invalid loan type. Please enter type1 or type2")

    # Loan Purpose
    print("\nLoan Purpose:")
    print("p1: Home Purchase")
    print("p2: Refinancing")
    print("p3: Home Improvement")
    print("p4: Other")
    while True:
        loan_purpose = input("Enter loan purpose (p1/p2/p3/p4): ").lower()
        if loan_purpose in ['p1', 'p2', 'p3', 'p4']:
            user_input['loan_purpose'] = loan_purpose
            break
        print("Invalid loan purpose. Please enter p1, p2, p3, or p4")

    # Credit Worthiness
    print("\nCredit Worthiness Level:")
    print("l1: Prime (Good credit history)")
    print("l2: Subprime (Challenged credit history)")
    while True:
        credit_worthiness = input("Enter Credit Worthiness (l1/l2): ").lower()
        if credit_worthiness in ['l1', 'l2']:
            user_input['Credit_Worthiness'] = credit_worthiness
            break
        print("Invalid credit worthiness. Please enter l1 or l2")

    # Occupancy Type
    print("\nOccupancy Type:")
    print("pr: Primary Residence (You'll live there)")
    print("sr: Secondary Residence (Vacation/Second home)")
    print("ir: Investment Property (Rental/Investment)")
    while True:
        occupancy = input("Enter occupancy type (pr/sr/ir): ").lower()
        if occupancy in ['pr', 'sr', 'ir']:
            user_input['occupancy_type'] = occupancy
            break
        print("Invalid occupancy type. Please enter pr, sr, or ir")

    return user_input

In [None]:
def interactive_prediction():
    while True:
        try:
            # Get user input
            user_input = get_user_input()

            # Make prediction
            pred, prob = predict_loan_default(user_input)

            # Print results with corrected interpretation
            print("\nPrediction Results:")
            print("===================")
            print(f"Prediction: {'Default' if pred else 'No Default'}")
            print(f"Probability of Default: {prob[1]:.4f}")
            print(f"Probability of No Default: {prob[0]:.4f}")

            # Add clear risk interpretation
            if prob[1] > 0.8:
                print("\nRISK ASSESSMENT: Very High Risk of Default")
            elif prob[1] > 0.6:
                print("\nRISK ASSESSMENT: High Risk of Default")
            elif prob[1] > 0.4:
                print("\nRISK ASSESSMENT: Moderate Risk of Default")
            elif prob[1] > 0.2:
                print("\nRISK ASSESSMENT: Low Risk of Default")
            else:
                print("\nRISK ASSESSMENT: Very Low Risk of Default")

            # Ask if user wants to make another prediction
            again = input("\nWould you like to make another prediction? (yes/no): ").lower()
            if again != 'yes':
                break

        except ValueError as e:
            print(f"\nError: {e}")
            print("Please try again with valid inputs")
            continue
        except Exception as e:
            print(f"\nAn unexpected error occurred: {e}")
            print("Please try again")
            continue


In [None]:
# Run the interactive prediction system
if __name__ == "__main__":
    print("\nWelcome to the Loan Default Prediction System")
    interactive_prediction()



Welcome to the Loan Default Prediction System


Interactive Loan Default Prediction

Enter numerical values:

Note: These values significantly impact your loan application.
Monthly income: $4,000.00

Monthly Debt Obligations:
Please enter your current monthly debt payments (excluding the new loan):

DTI Calculation Summary:
Current monthly debt: $650.00
Estimated new loan payment: $1,332.25
Total monthly debt: $1,982.25
Monthly income: $4,000.00
Calculated DTI ratio: 49.6%

Enter categorical values:

Loan Types:
type1: Conventional Loan (traditional mortgage)
type2: FHA/VA/Special Program Loan

Loan Purpose:
p1: Home Purchase
p2: Refinancing
p3: Home Improvement
p4: Other

Credit Worthiness Level:
l1: Prime (Good credit history)
l2: Subprime (Challenged credit history)

Occupancy Type:
pr: Primary Residence (You'll live there)
sr: Secondary Residence (Vacation/Second home)
ir: Investment Property (Rental/Investment)

Processing input data...
Original input: {'loan_amount': 120000.0, '

## Loan Default Prediction Summary

## Overview
- **Model**: KNN for loan default prediction.
- **Features**: Loan amount, income, credit score, DTI, loan-to-income ratio.

## Performance
- **Accuracy**: Predicts "No Default" (49.02% probability) for sample case ($120k -loan, $48k income, 710 credit score).
- **Optimization**: Uses cross-validation for k.

## Uses
- **Risk**: Binary predictions and probability-based risk levels.
- **Support**: Identifies key prediction factors.

## Notes
- **Limits**: Needs quality data; struggles with large or complex datasets.
- **Potential**: Room for improvement with advanced methods.

## Takeaway
Simple, effective tool for loan risk assessment, ready for integration and refinement.

# Data Set 2 - IDS System Predictions
## Dataset:
https://archive.ics.uci.edu/dataset/715/lt+fs+id+intrusion+detection+in+wsns

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.impute import SimpleImputer

In [2]:
# Load the IDS Dataset
print("Loading IDS Dataset...")

try:
    df = pd.read_csv('data.csv')
    print(f"Dataset loaded successfully with shape: {df.shape}")
except FileNotFoundError:
    print("Error: File 'data.csv' not found.")
    exit()

    # Print initial information about the dataset
print("\nInitial dataset information:")
print(df.info())
print("\nSample of the data:")
print(df.head())


Loading IDS Dataset...
Dataset loaded successfully with shape: (182, 5)

Initial dataset information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 182 entries, 0 to 181
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype
---  ------                  --------------  -----
 0   Area                    182 non-null    int64
 1   Sensing Range           182 non-null    int64
 2   Transmission Range      182 non-null    int64
 3   Number of Sensor nodes  182 non-null    int64
 4   Number of Barriers      182 non-null    int64
dtypes: int64(5)
memory usage: 7.2 KB
None

Sample of the data:
   Area  Sensing Range  Transmission Range  Number of Sensor nodes  \
0  5000             15                  30                     100   
1  5000             16                  32                     112   
2  5000             17                  34                     124   
3  5000             18                  36                     136   
4  5000             19 

In [3]:
# Check for missing values
print("\nMissing values in each column:")
print(df.isnull().sum())



Missing values in each column:
Area                      0
Sensing Range             0
Transmission Range        0
Number of Sensor nodes    0
Number of Barriers        0
dtype: int64


In [4]:
# Separate features and target
X = df.drop('Number of Barriers', axis=1)  # Using Number of Barriers as target
y = df['Number of Barriers']


In [5]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
# Find best k using cross-validation
print("\nFinding optimal k...")
k_values = [2, 3, 4, 5, 7, 9, 13, 15]
best_k = None
best_score = float('-inf')

for k in k_values:
    model = KNeighborsRegressor(n_neighbors=k, weights='uniform')
    scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring='r2')
    mean_score = scores.mean()
    print(f"k={k}, R² Score={mean_score:.4f}")

    if mean_score > best_score:
        best_score = mean_score
        best_k = k

print(f"\nBest k: {best_k} (R² Score: {best_score:.4f})")



Finding optimal k...
k=2, R² Score=0.9688
k=3, R² Score=0.9639
k=4, R² Score=0.9623
k=5, R² Score=0.9662
k=7, R² Score=0.9690
k=9, R² Score=0.9623
k=13, R² Score=0.9456
k=15, R² Score=0.9347

Best k: 7 (R² Score: 0.9690)


In [8]:
# Train final model
print("\nTraining final model...")
final_model = KNeighborsRegressor(n_neighbors=best_k, weights='uniform')
final_model.fit(X_train_scaled, y_train)



Training final model...


In [9]:
# Evaluate on test set
print("\nEvaluating on test set...")
y_pred = final_model.predict(X_test_scaled)

print("\nTest Set Evaluation:")
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.4f}")
print(f"Root Mean Squared Error: {np.sqrt(mean_squared_error(y_test, y_pred)):.4f}")



Evaluating on test set...

Test Set Evaluation:
R² Score: 0.9631
Mean Squared Error: 156.1864
Root Mean Squared Error: 12.4975


In [10]:
def predict_barriers(input_data):
    """
    Make a prediction using the trained KNN model.
    """
    # Create a DataFrame with the input data
    input_df = pd.DataFrame([input_data])

    # Scale the features
    input_scaled = scaler.transform(input_df)

    # Make prediction
    prediction = final_model.predict(input_scaled)

    return prediction[0]


In [11]:
def get_user_input():
    """
    Get user input for making predictions.
    """
    print("\nIntrusion Detection System (IDS) Configuration Parameters")
    print("======================================================")
    print("\nThis system will help you determine the optimal number of security barriers")
    print("needed for your wireless sensor network based on your deployment parameters.")
    print("\nPlease provide the following information:")

    user_input = {}

    # Area input
    print("\n1. Area Configuration:")
    print("   The total area of your deployment zone in square meters.")
    print("   Typical range: 1000-10000 square meters")
    print("   Larger areas typically require more barriers for effective coverage.")
    while True:
        try:
            value = float(input("\nEnter Area (in square meters): "))
            if value <= 0:
                print("Area must be positive")
                continue
            if value < 1000:
                print("Warning: Area seems small. This might affect sensor coverage.")
            elif value > 10000:
                print("Warning: Large area. May require significant number of barriers.")
            user_input['Area'] = value
            break
        except ValueError:
            print("Please enter a valid number")

    # Sensing Range input
    print("\n2. Sensing Range Configuration:")
    print("   The radius within which each sensor can detect intrusions.")
    print("   Typical range: 10-50 meters")
    print("   Larger sensing ranges provide better coverage but may consume more power.")
    while True:
        try:
            value = float(input("\nEnter Sensing Range (in meters): "))
            if value <= 0:
                print("Sensing Range must be positive")
                continue
            if value < 10:
                print("Warning: Small sensing range may create coverage gaps.")
            elif value > 50:
                print("Warning: Large sensing range may impact sensor battery life.")
            user_input['Sensing Range'] = value
            break
        except ValueError:
            print("Please enter a valid number")

    # Transmission Range input
    print("\n3. Transmission Range Configuration:")
    print("   The maximum distance at which sensors can communicate with each other.")
    print("   Typical range: 20-100 meters")
    print("   Should be greater than sensing range for reliable communication.")
    while True:
        try:
            value = float(input("\nEnter Transmission Range (in meters): "))
            if value <= 0:
                print("Transmission Range must be positive")
                continue
            if value <= user_input['Sensing Range']:
                print("Warning: Transmission range should typically be larger than sensing range.")
            user_input['Transmission Range'] = value
            break
        except ValueError:
            print("Please enter a valid number")

    # Number of Sensor nodes input
    print("\n4. Number of Sensor Nodes:")
    print("   Total number of sensor devices to be deployed in the network.")
    print("   More nodes generally provide better coverage but increase cost.")
    print("   Recommended: At least 1 node per 100 square meters for basic coverage.")
    while True:
        try:
            value = float(input("\nEnter Number of Sensor nodes: "))
            if value <= 0:
                print("Number of nodes must be positive")
                continue
            recommended_min = user_input['Area'] / 100
            if value < recommended_min:
                print(f"Warning: Number of nodes may be too low for the area.")
                print(f"Recommended minimum: {recommended_min:.0f} nodes for your area.")
            user_input['Number of Sensor nodes'] = value
            break
        except ValueError:
            print("Please enter a valid number")

    return user_input


In [12]:
def interactive_prediction():
    """
    Run interactive prediction loop.
    """
    while True:
        try:
            # Get user input
            user_input = get_user_input()

            # Make prediction
            predicted_barriers = predict_barriers(user_input)

            # Print results
            print("\nIDS Configuration Analysis")
            print("========================")

            # Coverage Analysis
            area = user_input['Area']
            sensing_range = user_input['Sensing Range']
            nodes = user_input['Number of Sensor nodes']
            transmission_range = user_input['Transmission Range']

            # Calculate key metrics
            theoretical_coverage_per_node = np.pi * sensing_range**2
            total_theoretical_coverage = theoretical_coverage_per_node * nodes
            coverage_ratio = (total_theoretical_coverage / area) * 100

            print("\n1. Deployment Parameters:")
            print(f"   • Area Coverage: {area:,.0f} square meters")
            print(f"   • Sensing Range: {sensing_range:.1f} meters")
            print(f"   • Transmission Range: {transmission_range:.1f} meters")
            print(f"   • Number of Sensor Nodes: {nodes:.0f}")

            print("\n2. Coverage Analysis:")
            print(f"   • Coverage per node: {theoretical_coverage_per_node:.1f} square meters")
            print(f"   • Total theoretical coverage: {total_theoretical_coverage:,.1f} square meters")
            print(f"   • Coverage ratio: {coverage_ratio:.1f}%")

            if coverage_ratio < 100:
                print("   ⚠ Warning: Potential coverage gaps in the network")
            elif coverage_ratio > 200:
                print("   ℹ Note: High sensor overlap, might be over-provisioned")

            print("\n3. Barrier Prediction:")
            print(f"   • Predicted Number of Barriers: {predicted_barriers:.0f}")
            print("\n   What this means:")
            print(f"   • The model suggests deploying {predicted_barriers:.0f} security barriers")
            print("   • Each barrier represents a line of defense against intrusion")
            print("   • More barriers generally mean:")
            print("     - Higher security level")
            print("     - Better intrusion detection probability")
            print("     - More redundancy in case of node failures")

            print("\n4. Network Recommendations:")
            if transmission_range < 2 * sensing_range:
                print("   ⚠ Consider increasing transmission range for better connectivity")
            if coverage_ratio < 90:
                print("   ⚠ Consider adding more sensor nodes to improve coverage")
            if predicted_barriers < 10:
                print("   ⚠ Low number of barriers might indicate security vulnerabilities")
            elif predicted_barriers > 100:
                print("   ℹ High number of barriers might indicate over-provisioning")

            # Ask if user wants to make another prediction
            print("\n==============================================")
            again = input("Would you like to analyze another configuration? (yes/no): ").lower()
            if again != 'yes':
                print("\nThank you for using the IDS Configuration Analyzer!")
                break
        except Exception as e:
            print(f"\nAn error occurred: {e}")
            print("Please try again")


In [None]:
# Run the interactive prediction system
if __name__ == "__main__":
    print("\nWelcome to the IDS Configuration Analyzer")
    print("=======================================")
    print("This system helps you determine the optimal number of security barriers")
    print("needed for your Intrusion Detection System based on your deployment parameters.")
    print("\nModel Performance Summary:")
    print(f"Best k value: {best_k}")
    print(f"Cross-validation R² Score: {best_score:.4f}")
    print("\nStarting configuration analysis...")
    interactive_prediction()


Welcome to the IDS Configuration Analyzer
This system helps you determine the optimal number of security barriers
needed for your Intrusion Detection System based on your deployment parameters.

Model Performance Summary:
Best k value: 7
Cross-validation R² Score: 0.9690

Starting configuration analysis...

Intrusion Detection System (IDS) Configuration Parameters

This system will help you determine the optimal number of security barriers
needed for your wireless sensor network based on your deployment parameters.

Please provide the following information:

1. Area Configuration:
   The total area of your deployment zone in square meters.
   Typical range: 1000-10000 square meters
   Larger areas typically require more barriers for effective coverage.

Enter Area (in square meters): 1200

2. Sensing Range Configuration:
   The radius within which each sensor can detect intrusions.
   Typical range: 10-50 meters
   Larger sensing ranges provide better coverage but may consume more pow

## IDS System Configuration Summary

## Overview
- **Purpose**: Evaluate sensor node deployment for intrusion detection.
- **Key Parameters**:
  - **Area**: 5,000 sqm
  - **Nodes**: 8 (below recommended minimum of 50)
  - **Sensing Range**: 12.0 m
  - **Transmission Range**: 25.0 m

## Coverage Analysis
- **Coverage per Node**: 452.4 sqm
- **Total Coverage**: 3,619.1 sqm (72.4% of area)
- **Issue**: Potential coverage gaps due to insufficient nodes.

## Barrier Prediction
- **Predicted Barriers**: 29
- **Implications**:
  - Suggests 29 security barriers as lines of defense.
  - Benefits include:
    - Higher security level
    - Better intrusion detection probability
    - More redundancy against node failures

## Recommendations
- **Action Needed**: Increase sensor nodes to enhance coverage and security.

## Takeaway
With only 8 nodes, coverage is limited to 72.4%, risking security gaps. Adding nodes is recommended for a more robust IDS deployment.