<div align="center">
    <h1>Projet Algorithmique - Decision Tree </h1>
    <h2>Ikram IDDOUCH </h2>
    
[![Python](https://img.shields.io/badge/python-blue.svg)](https://shields.io/)
[![VisualStudioCode](https://img.shields.io/badge/VisualStudioCode-green.svg)](https://shields.io/)
</div>


<h1>Première partie : Apprentissage </h1>

In [1]:
from collections import Counter
import math
import pandas as pd
from sklearn.metrics import confusion_matrix


In [2]:
def entropy(data):
    counts = Counter(data)
    entropy = 0
    total = len(data)
    if counts :
        for count in counts.values():
            probability = count / total
            entropy -= probability * math.log2(probability)
    return entropy  


In [3]:
def gain(data, attribute, target):
    total_entropy = entropy(data[target])
    list_attribute = data[attribute].unique()
    attribute_entropy = 0
    
    for value in list_attribute:
        subdata = data[data[attribute] == value]
        attribute_entropy += (len(subdata[target]) / len(data)) * entropy(subdata[target])
    
    gain = total_entropy - attribute_entropy
    return gain

In [4]:
def best_attribute(data, target, non_target):
    gains = {}
    
    for attribute in non_target:
        gains[attribute] = gain(data, attribute, target)
        
    return max(gains, key=gains.get)

In [5]:
def id3(data, target, non_target):
    if len(data) == 0:
        return {'No data'}
    
    if not non_target:
        return {target: data[target].mode()}
        
    if len(data[target].unique()) == 1:
        return {target: data[target].iloc[0]}
    
    best = best_attribute(data, target, non_target)
    non_target.remove(best)
    
    node = {'Attribute': best, 'Neighbor': {}}
    
    for value in data[best].unique():
        subdata = data[data[best] == value].drop(columns=best)
        node['Neighbor'][value] = id3(subdata, target, non_target.copy())
    
    return node

In [6]:
def print_tree(tree, target,indent=''):
    if target in tree:
        print(indent + target+':', tree[target])
    else:
        print(indent + 'Attribute:', tree['Attribute'])
        for value, child_node in tree['Neighbor'].items():
            print(indent + '--', value)
            print_tree(child_node,target,indent + '   ')

In [7]:
golf = pd.read_csv('données/golf.csv')
target_golf = 'play'
non_target_golf = list(golf.keys())
non_target_golf.remove(target_golf)
tree_golf = id3(golf, target_golf, non_target_golf)

print_tree(tree_golf,target_golf)

Attribute: outlook
-- sunny
   Attribute: humidity
   -- high
      play: no
   -- normal
      play: yes
-- overcast
   play: yes
-- rain
   Attribute: wind
   -- False
      play: yes
   -- True
      play: no


In [8]:
df = pd.read_csv('données/soybean-app.csv')
df.replace('?', pd.NA, inplace=True)

soybean = df.dropna()
soybean.reset_index(drop=True, inplace=True)

target_soybean = 'severity'
non_target_soybean = list(soybean.keys())
non_target_soybean.remove(target_soybean)
tree_soybean = id3(soybean, target_soybean, non_target_soybean)

print_tree(tree_soybean,target_soybean)

Attribute: class 		
-- diaporthe-stem-canker
   Attribute: date
   -- august
      Attribute: hail
      -- yes
         severity: severe
      -- no
         severity: pot-severe
   -- october
      severity: pot-severe
   -- september
      severity: pot-severe
   -- july
      Attribute: seed-tmt
      -- fungicide
         severity: severe
      -- none
         severity: pot-severe
-- rhizoctonia-root-rot
   Attribute: seed-tmt
   -- none
      Attribute: crop-hist
      -- same-lst-sev-yrs
         severity: severe
      -- same-lst-yr
         severity: severe
      -- same-lst-two-yrs
         Attribute: germination
         -- 80-89
            severity: pot-severe
         -- lt-80
            severity: severe
      -- diff-lst-year
         Attribute: date
         -- august
            severity: severe
         -- may
            severity: pot-severe
   -- fungicide
      severity: pot-severe
-- phytophthora-rot
   Attribute: date
   -- april
      Attribute: seed-tmt
     

<h1>Deuxième partie : Prédiction </h1>

In [9]:
def predict(tree, data, target):
    predictions = []
    for index, row in data.iterrows():
        prediction = traverse_tree(tree,target ,row)
        predictions.append(prediction)
    return predictions

def traverse_tree(node,target, example):
    if target in node:
        return node[target]
    else:
        attribute = node['Attribute']
        value = example[attribute]
        child_node = node['Neighbor'][value]
        return traverse_tree(child_node,target ,example)


In [10]:
golf_app = pd.read_csv('données/golf.csv')
target_golf = 'play'
non_target_golf = list(golf_app.keys())
non_target_golf.remove(target_golf)
tree_golf = id3(golf, target_golf, non_target_golf)


predictions_train = predict(tree_golf, golf_app,target_golf)
confusion_matrix_train = confusion_matrix(golf_app[target_golf], predictions_train)
print("Matrice de confusion d'apprentissage:")
print(confusion_matrix_train)

golf_pred = pd.read_csv('données/golf_pred.csv')
predictions_test = predict(tree_golf, golf_pred,target_golf)

confusion_matrix_test = confusion_matrix(golf_pred[target_golf], predictions_test)
print("Matrice de confusion de prédiction:")
print(confusion_matrix_test)


Matrice de confusion d'apprentissage:
[[5 0]
 [0 9]]
Matrice de confusion de prédiction:
[[0 2 2]
 [0 0 0]
 [0 0 0]]
