In [1]:
import pandas as pd
from collections import Counter

In [2]:
df = pd.read_csv('classification.csv')
df.head()

Unnamed: 0,Color,Engine Type,Top Speed,Aerodynamics,Team
0,Red,Hybrid,Fast,Good,Red Bull
1,Blue,Electric,Medium,Excellent,Mercedes
2,Blue,Hybrid,Fast,Fair,Red Bull
3,Red,Hybrid,Medium,Good,Red Bull
4,Blue,Electric,Fast,Fair,Mercedes


In [3]:
def class_probs(df, target):
    total = len(df)
    class_counts = Counter(df[target])
    class_probs = {i: ct / total for i, ct in class_counts.items()}
    return class_counts, class_probs

def feature_probs(df, feature, target):
    feature_dict = {}
    for class_ in df[target].unique():
        mini_df = df[df[target] == class_]
        feature_counts = Counter(mini_df[feature].astype(str))
        tot_count = len(mini_df)
        feature_dict[class_] = {f"{val}": count / tot_count for val, count in feature_counts.items()}
    
    return feature_dict

def calc_probs(instance, feat_probs, class_probs):
    inst_probs = {}
    for class_, class_prob in class_probs.items():
        probs = class_prob
        for i, feature_val in enumerate(instance):
            if feature_val in feat_probs[i][class_]:
                probs *= feat_probs[i][class_][feature_val]
            else:
                probs *= 0  
        inst_probs[class_] = probs
    return inst_probs

In [4]:
target = df.columns[-1]
class_counts, class_prob = class_probs(df, target)

feature_probs_list = []
for feature in df.columns:
    if feature == target:
        continue
    feature_probs_list.append(feature_probs(df, feature, target))

In [6]:
prediction = "Red,Electric,Fast,Good"
prediction = list(prediction.split(","))
val = calc_probs(prediction, feature_probs_list, class_prob)
final_class = max(val, key=val.get)
for i, j in val.items():
    print(f"{i}: {j}")

Red Bull: 0.036000000000000004
Mercedes: 0.0


In [7]:
print(f"The final prediction for the given input is: {final_class}")

The final prediction for the given input is: Red Bull


## DA_6

In [2]:
import pandas as pd
from collections import Counter
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

# Sample dataset as provided
data = {
    'Color': ['Red', 'Blue', 'Blue', 'Red', 'Blue', 'Red', 'Blue', 'Red'],
    'Engine Type': ['Hybrid', 'Electric', 'Hybrid', 'Hybrid', 'Electric', 'Hybrid', 'Electric', 'Hybrid'],
    'Top Speed': ['Fast', 'Medium', 'Fast', 'Medium', 'Fast', 'Medium', 'Fast', 'Fast'],
    'Aerodynamics': ['Good', 'Excellent', 'Fair', 'Good', 'Fair', 'Excellent', 'Good', 'Excellent'],
    'Team': ['Red Bull', 'Mercedes', 'Red Bull', 'Red Bull', 'Mercedes', 'Mercedes', 'Red Bull', 'Red Bull']
}
df = pd.read_csv('classification.csv')

def class_probs(df, target):
    total = len(df)
    class_counts = Counter(df[target])
    class_probs = {i: ct / total for i, ct in class_counts.items()}
    return class_counts, class_probs

def feature_probs(df, feature, target):
    feature_dict = {}
    for class_ in df[target].unique():
        mini_df = df[df[target] == class_]
        feature_counts = Counter(mini_df[feature].astype(str))
        tot_count = len(mini_df)
        feature_dict[class_] = {f"{val}": count / tot_count for val, count in feature_counts.items()}
    return feature_dict

def calc_probs(instance, feat_probs, class_probs):
    inst_probs = {}
    for class_, class_prob in class_probs.items():
        probs = class_prob
        for i, feature_val in enumerate(instance):
            if feature_val in feat_probs[i][class_]:
                probs *= feat_probs[i][class_][feature_val]
            else:
                probs *= 0  
        inst_probs[class_] = probs
    return inst_probs

target = df.columns[-1]
class_counts, class_prob = class_probs(df, target)
print("Class probabilities:", class_prob)

feature_probs_list = []
for feature in df.columns[:-1]:  
    feature_probs_list.append(feature_probs(df, feature, target))

def predict(instance):
    instance = list(instance)
    val = calc_probs(instance, feature_probs_list, class_prob)
    final_class = max(val, key=val.get)
    return final_class

df['predicted_team'] = df.apply(lambda row: predict(row[:-1]), axis=1)
print("Predictions for each row:\n", df[['Team', 'predicted_team']])

df['actual'] = df['Team'].apply(lambda x: 1 if x == 'Red Bull' else 0)
df['predicted'] = df['predicted_team'].apply(lambda x: 1 if x == 'Red Bull' else 0)

def evaluate_classification(df):
    actual = df['actual']
    predicted = df['predicted']

    accuracy = accuracy_score(actual, predicted)
    precision = precision_score(actual, predicted)
    recall = recall_score(actual, predicted)
    f1 = f1_score(actual, predicted)
    tn, fp, fn, tp = confusion_matrix(actual, predicted).ravel()
    specificity = tn / (tn + fp)
    roc_auc = roc_auc_score(actual, predicted)
    print("\nEvaluation Metrics:")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall (Sensitivity): {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print(f"Specificity: {specificity:.2f}")
    print(f"ROC AUC Score: {roc_auc:.2f}")

evaluate_classification(df)

Class probabilities: {'Red Bull': 0.625, 'Mercedes': 0.375}
Predictions for each row:
        Team predicted_team
0  Red Bull       Red Bull
1  Mercedes       Mercedes
2  Red Bull       Red Bull
3  Red Bull       Red Bull
4  Mercedes       Mercedes
5  Mercedes       Mercedes
6  Red Bull       Red Bull
7  Red Bull       Red Bull

Evaluation Metrics:
Accuracy: 1.00
Precision: 1.00
Recall (Sensitivity): 1.00
F1 Score: 1.00
Specificity: 1.00
ROC AUC Score: 1.00
