# API-Driven Concept Learning System  
## Email Security & Server Load Decision Support

This project implements a dataset-driven concept learning system using
Find-S and Candidate Elimination principles (Machine Learning â€“ Unit 1).

The system:
- Learns decision rules from structured datasets (CSV-style)
- Accepts API-style JSON inputs at runtime
- Produces explainable, professional decision outputs

Domains covered:
1. Email Spam Classification
2. Server Load Decision Support (for Load Balancers)


In [9]:
import pandas as pd

email_data = pd.DataFrame([
    ["present", "unknown", "high",   "high",   "executable", "Spam"],
    ["present", "unknown", "medium", "high",   "executable", "Spam"],
    ["present", "unknown", "high",   "medium", "safe",       "Spam"],
    ["present", "unknown", "medium", "medium", "safe",       "Spam"],
    ["present", "unknown", "high",   "high",   "safe",       "Spam"],
    ["present", "unknown", "medium", "high",   "none",       "Spam"],
    ["present", "unknown", "high",   "medium", "none",       "Spam"],
    ["present", "unknown", "medium", "medium", "executable", "Spam"],

    ["absent",  "trusted", "low",    "low",    "none",       "Legitimate"],
    ["absent",  "trusted", "low",    "medium", "none",       "Legitimate"],
    ["present", "trusted", "low",    "low",    "safe",       "Legitimate"],
    ["absent",  "trusted", "medium", "low",    "none",       "Legitimate"],
    ["present", "trusted", "medium", "low",    "safe",       "Legitimate"],
    ["absent",  "trusted", "low",    "low",    "safe",       "Legitimate"],
    ["absent",  "trusted", "medium", "medium", "none",       "Legitimate"],

    ["present", "unknown", "high",   "high",   "safe",       "Spam"],
    ["present", "unknown", "medium", "high",   "safe",       "Spam"],
    ["present", "unknown", "high",   "medium", "executable", "Spam"],
    ["present", "unknown", "medium", "medium", "none",       "Spam"],

    ["absent",  "unknown", "low",    "low",    "none",       "Legitimate"],
    ["absent",  "unknown", "medium", "low",    "none",       "Legitimate"],
    ["present", "trusted", "low",    "medium", "safe",       "Legitimate"],
    ["absent",  "trusted", "medium", "low",    "safe",       "Legitimate"],
    ["absent",  "trusted", "low",    "medium", "none",       "Legitimate"],

    ["present", "unknown", "high",   "high",   "executable", "Spam"],
    ["present", "unknown", "medium", "high",   "executable", "Spam"],
    ["present", "unknown", "high",   "medium", "safe",       "Spam"],
    ["absent",  "trusted", "low",    "low",    "none",       "Legitimate"],
    ["present", "trusted", "low",    "low",    "safe",       "Legitimate"]
], columns=[
    "url_present",
    "sender_reputation",
    "keyword_score",
    "capital_ratio",
    "attachment_type",
    "label"
])

email_data.head(), email_data.shape


(  url_present sender_reputation keyword_score capital_ratio attachment_type  \
 0     present           unknown          high          high      executable   
 1     present           unknown        medium          high      executable   
 2     present           unknown          high        medium            safe   
 3     present           unknown        medium        medium            safe   
 4     present           unknown          high          high            safe   
 
   label  
 0  Spam  
 1  Spam  
 2  Spam  
 3  Spam  
 4  Spam  ,
 (29, 6))

In [10]:
training_data = []

for _, row in email_data.iterrows():
    attributes = row[:-1].tolist()
    label = row[-1]
    training_data.append((attributes, label))

training_data[:5]


  label = row[-1]


[(['present', 'unknown', 'high', 'high', 'executable'], 'Spam'),
 (['present', 'unknown', 'medium', 'high', 'executable'], 'Spam'),
 (['present', 'unknown', 'high', 'medium', 'safe'], 'Spam'),
 (['present', 'unknown', 'medium', 'medium', 'safe'], 'Spam'),
 (['present', 'unknown', 'high', 'high', 'safe'], 'Spam')]

In [11]:
def find_s(training_data):
    hypothesis = ['0'] * len(training_data[0][0])

    for attributes, label in training_data:
        if label == "Spam":
            for i in range(len(attributes)):
                if hypothesis[i] == '0':
                    hypothesis[i] = attributes[i]
                elif hypothesis[i] != attributes[i]:
                    hypothesis[i] = '?'
    return hypothesis


In [12]:
learned_hypothesis = find_s(training_data)
learned_hypothesis


['present', 'unknown', '?', '?', '?']

In [13]:
email_api_input = {
    "url_present": "present",
    "sender_reputation": "unknown",
    "keyword_score": "high",
    "capital_ratio": "high",
    "attachment_type": "executable"
}


In [14]:
def extract_features(api_json):
    return [
        api_json["url_present"],
        api_json["sender_reputation"],
        api_json["keyword_score"],
        api_json["capital_ratio"],
        api_json["attachment_type"]
    ]

api_features = extract_features(email_api_input)
api_features


['present', 'unknown', 'high', 'high', 'executable']

In [15]:
def classify(instance, hypothesis):
    for i in range(len(hypothesis)):
        if hypothesis[i] != '?' and hypothesis[i] != instance[i]:
            return "Legitimate"
    return "Spam"


In [16]:
decision = classify(api_features, learned_hypothesis)
decision


'Spam'

In [17]:
def p_output(decision, hypothesis):
    print("EMAIL SECURITY DECISION REPORT")
    print("-" * 35)
    print(f"Final Decision      : {'EMAIL FLAGGED AS SPAM' if decision == 'Spam' else 'EMAIL MARKED AS LEGITIMATE'}")
    print("Decision Method     : Concept Learning (Find-S Algorithm)")
    print("Learning Source     : Dataset-driven training")
    print("Model Explainability: Hypothesis-based reasoning")
    print(f"Learned Hypothesis  : {hypothesis}")

p_output(decision, learned_hypothesis)


EMAIL SECURITY DECISION REPORT
-----------------------------------
Final Decision      : EMAIL FLAGGED AS SPAM
Decision Method     : Concept Learning (Find-S Algorithm)
Learning Source     : Dataset-driven training
Model Explainability: Hypothesis-based reasoning
Learned Hypothesis  : ['present', 'unknown', '?', '?', '?']
