In [1]:
class Node:
    def __init__(self, data, attributes, target, depth=0):
        self.data = data
        self.attributes = attributes
        self.target = target
        self.depth = depth
        self.split_attr = None
        self.children = []

def read_csv_data(file_path):
    with open(file_path, "r") as f:
        lines = f.readlines()
    header = lines[0].strip().split(",")
    data = [dict(zip(["index"] + header, [i] + line.strip().split(","))) for i, line in enumerate(lines[1:], 1)]
    return data

def write_csv_data(file_path, header, data):
    with open(file_path, "w") as f:
        f.write(",".join(header) + "\n")
        for row in data:
            f.write(",".join([str(row[h]) for h in header]) + "\n")

def sort_data(data, attr):
    return sorted(data, key=lambda x: x[attr])

def gini_index(data, attr, target):
    # Calculate the Gini index for the given attribute and data set
    pass

def build_tree(node, max_depth):
    if stopping_criterion(node, max_depth):
        return

    node.split_attr = best_split(node.data, node.attributes, node.target)
    for child_data in split_data(node.data, node.split_attr):
        child_attributes = list(node.attributes)
        child_attributes.remove(node.split_attr)
        child = Node(child_data, child_attributes, node.target, node.depth + 1)
        node.children.append(child)
        build_tree(child, max_depth)

# Define a function to prune the decision tree (optional).

def classify(tree, test_instance):
    # Classify the test instance using the decision tree
    pass

file_path = "loan_train_em.csv"
attributes = ["Married", "Dependents", "Education", "Self_Employed", "Applicant_Income", "Coapplicant_Income", "Loan_Amount", "Credit_History"]

data = read_csv_data(file_path)
header = ["index"] + list(data[0].keys())[1:]

for attr in attributes:
    sorted_attr_data = sort_data(data, attr)
    sorted_file_path = f"sorted_{attr}.csv"
    write_csv_data(sorted_file_path, header, sorted_attr_data)
    
def count_credit_status(data):
    counts = {"Y": 0, "N": 0}
    for row in data:
        counts[row["Credit_Status"]] += 1
    return counts

def gini_index(data):
    counts = count_credit_status(data)
    total = sum(counts.values())
    gini = 1 - sum([(count / total) ** 2 for count in counts.values()])
    return gini

def gini_index_for_attribute(data, attr):
    sorted_data = sort_data(data, attr)
    total_gini = 0
    for i in range(len(sorted_data) - 1):
        left_data = sorted_data[: i + 1]
        right_data = sorted_data[i + 1 :]
        left_gini = gini_index(left_data)
        right_gini = gini_index(right_data)
        total_gini += (len(left_data) * left_gini + len(right_data) * right_gini) / len(sorted_data)
    return total_gini / (len(sorted_data) - 1)

def is_continuous_attribute(attr):
    continuous_attributes = ["Applicant_Income", "Coapplicant_Income", "Loan_Amount"]
    return attr in continuous_attributes

for attr in attributes:
    if is_continuous_attribute(attr):
        gini = 0
    else:
        gini = gini_index_for_attribute(data, attr)
    print(f"Gini index for {attr}: {gini}")

Gini index for Married: 0.4589101605927139
Gini index for Dependents: 0.45851901615514196
Gini index for Education: 0.46011432316894735
Gini index for Self_Employed: 0.4575959957796785
Gini index for Applicant_Income: 0
Gini index for Coapplicant_Income: 0
Gini index for Loan_Amount: 0
Gini index for Credit_History: 0.42423361264420323
