In [None]:
import sys
import os
import pandas as pd
from networksecurity.pipeline.prediction_pipeline import PredictionPipeline, NetworkData

## 1. Single Prediction Example

In [None]:
# Create sample data
network_data = NetworkData(
    having_IP_Address=-1,
    URL_Length=1,
    Shortining_Service=1,
    having_At_Symbol=1,
    double_slash_redirecting=-1,
    Prefix_Suffix=-1,
    having_Sub_Domain=-1,
    SSLfinal_State=-1,
    Domain_registeration_length=-1,
    Favicon=1,
    port=1,
    HTTPS_token=-1,
    Request_URL=1,
    URL_of_Anchor=-1,
    Links_in_tags=1,
    SFH=-1,
    Submitting_to_email=-1,
    Abnormal_URL=-1,
    Redirect=0,
    on_mouseover=1,
    RightClick=1,
    popUpWidnow=1,
    Iframe=1,
    age_of_domain=-1,
    DNSRecord=-1,
    web_traffic=-1,
    Page_Rank=-1,
    Google_Index=1,
    Links_pointing_to_page=1,
    Statistical_report=-1
)

# Get dataframe
df = network_data.get_data_as_dataframe()
print("Input Data:")
print(df)

In [None]:
# Make prediction
prediction_pipeline = PredictionPipeline()
prediction = prediction_pipeline.predict(df)

result = "Phishing Website" if prediction[0] == 1 else "Legitimate Website"
print(f"\nPrediction: {result}")
print(f"Prediction Value: {prediction[0]}")

## 2. Batch Prediction from CSV

In [None]:
# Load test data (if available)
# test_df = pd.read_csv('path_to_test_file.csv')
# predictions = prediction_pipeline.predict(test_df)
# test_df['Prediction'] = predictions
# test_df['Result'] = test_df['Prediction'].apply(lambda x: 'Phishing' if x == 1 else 'Legitimate')
# print(test_df[['Prediction', 'Result']].head())

## 3. Model Performance Metrics

In [None]:
# You can load and analyze model evaluation results
import yaml

# Find the latest artifact directory
artifacts_dir = 'Artifacts'
if os.path.exists(artifacts_dir):
    # Get latest timestamp folder
    timestamps = [d for d in os.listdir(artifacts_dir) if os.path.isdir(os.path.join(artifacts_dir, d))]
    if timestamps:
        latest = sorted(timestamps)[-1]
        report_path = os.path.join(artifacts_dir, latest, 'model_evaluation', 'report.yaml')
        
        if os.path.exists(report_path):
            with open(report_path, 'r') as f:
                metrics = yaml.safe_load(f)
            print("Model Evaluation Metrics:")
            for key, value in metrics.items():
                print(f"{key}: {value:.4f}")
        else:
            print("No evaluation report found. Train the model first using: python main.py")
else:
    print("No artifacts found. Train the model first using: python main.py")

## 4. Feature Importance (if using Random Forest)

In [None]:
# Feature names
feature_names = [
    'having_IP_Address', 'URL_Length', 'Shortining_Service', 'having_At_Symbol',
    'double_slash_redirecting', 'Prefix_Suffix', 'having_Sub_Domain', 'SSLfinal_State',
    'Domain_registeration_length', 'Favicon', 'port', 'HTTPS_token', 'Request_URL',
    'URL_of_Anchor', 'Links_in_tags', 'SFH', 'Submitting_to_email', 'Abnormal_URL',
    'Redirect', 'on_mouseover', 'RightClick', 'popUpWidnow', 'Iframe', 'age_of_domain',
    'DNSRecord', 'web_traffic', 'Page_Rank', 'Google_Index', 'Links_pointing_to_page',
    'Statistical_report'
]

print(f"Total features: {len(feature_names)}")