### Probability Table - Implementation

In [196]:
import pandas as pd

csv_path = r'data/probability_table_data.csv'

data = pd.read_csv(csv_path)
data.shape

(12, 4)

In [197]:
total = len(data)

data_fractions = {}  # dictionary to store the fractions of each attribute

sim_count = data[data['Decisao'] == 'sim'].shape[0]
nao_count = data[data['Decisao'] == 'nao'].shape[0]

In [198]:
# Processing the data to get the fractions of each attribute
for col in data.columns:
    if col in ['Clima', 'Temperatura', 'Umidade']:
        for _, row in data.iterrows():
            if row['Decisao'] == 'sim':
                key = f'{row[col]}_s'
                data_fractions[key] = data_fractions.get(key, 0) + 1
            elif row['Decisao'] == 'nao':
                key = f'{row[col]}_n'
                data_fractions[key] = data_fractions.get(key, 0) + 1

In [199]:
# The heart of the algorithm, calculate the probability of a new case
def prob(new_case, data_fractions):
    aux = [value for _, value in new_case.items()]
    p_s = sim_count / total
    p_n = nao_count / total
    
    for _, key in enumerate(aux):
        key_s = f'{key}_s'
        key_n = f'{key}_n'
        
        if key_s in data_fractions and data_fractions[key_s] > 0:
            p_s *= data_fractions[key_s] / sim_count
        if key_n in data_fractions and data_fractions[key_n] > 0:
            p_n *= data_fractions[key_n] / nao_count
    
    p_sum = p_s + p_n
    if p_sum == 0:
        return 0, 0
    
    p_s = (p_s / p_sum) * 100
    p_n = (p_n / p_sum) * 100
    return p_s, p_n

In [200]:
# new_case = {'Clima': 'chuva', 'Temperatura': 'ameno', 'Umidade': 'alta'}
new_case = {'Clima': 'sol', 'Temperatura': 'quente', 'Umidade': 'alta'}

In [201]:
p_s, p_n = prob(new_case, data_fractions)
print(f'Prob Sim: {p_s:.2f}%\nProb Nao: {p_n:.2f}%')

Prob Sim: 88.89%
Prob Nao: 11.11%
