In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import networkx as nx

In [None]:
import seaborn as sns

**Load Data Set**

In [None]:
data = pd.read_csv('./dataset/tennis.csv')
data

In [None]:
data.columns

In [None]:
data.info()

In [None]:
data.outlook.unique()

In [None]:
data.outlook.value_counts()

In [None]:
data.play.value_counts()

# Outlook vs Play

In [None]:
group = data[['outlook','play']].groupby(by=['outlook'])

In [None]:
group.count()

In [None]:
table = pd.crosstab(data['play'],columns=data['outlook'])
table

In [None]:
table.loc['total'] = table.loc['no'] + table.loc['yes']
table['total'] = table['overcast'] + table['rainy']  + table['sunny']

# Pivot Table

In [None]:
table

# Contingency Table

In [None]:
prob = table.div(14)# table.div(len(data)), table.div(table.iloc[-1,-1])
prob

##  Marginal Probability:
- $P(overcast)$
- $P(rainy)$
- $P(sunny)$

In [None]:
prob['overcast'].total

In [None]:
# P(overcast)
p_o = prob['overcast'].total
# P(rainy)
p_r = prob['rainy'].total
# P(sunny)
p_s = prob['sunny'].total


In [None]:
# print
print('Probability of overcast: P(overcast) = %0.3f'%p_o )
print('Probability of rainy: P(rainy) = %0.3f'%p_r )
print('Probability of sunny: P(sunny) = %0.3f'%p_s )

##  Marginal Probability:
- $P(no)$
- $P(yes)$

In [None]:
# P(no)
p_no = prob.loc['no'].total
# P(yes)
p_yes = prob.loc['yes'].total

In [None]:
# print
print('Probability of No: P(no) = %0.3f'%p_no )
print('Probability of Yes: P(yes) = %0.3f'%p_yes )

# Conditionality Probability

# $P(A|B) = \frac{P(A and B)}{P(B)}$
- $P(Sunny | Yes)$
- $P(Sunny | No )$
- $P(Sunny | Yes)$
- $P(Sunny | No )$


>  Working on how to compute conditional probability

In [None]:
prob

In [None]:
prob.keys()

In [None]:
prob.index

In [None]:
prob['overcast']['yes'] # joint probability

In [None]:
prob['total']['yes'] # marginal probability

In [None]:
prob['overcast']['total'] # marginal probability

In [None]:
prob # p(rainy and yes)

In [None]:
def margin(A):
    try:
        p = prob[A][-1]
        print('in try')
        return p
    except:
        p = prob.loc[A][-1]
        print('in except')
        return p

In [None]:
margin('yes')

> 

In [None]:
def jointprob(A,B,table):
    """
    jointprob(A,B) will return probability of combination attribute from 
    contigency table. P(A and B)
    A = column
    B = row
    >>> jointprob(A,B,table)
    
    """
    return table[A][B]#.loc[B]

def marginprob(B,table):
    """
    marginprob(B) will return probability of attribute from 
    contigency table. P(B)
    B = row
    >>> marginprob(B,table)
    
    """
    try:
        return table.loc[B][-1]
    except:
        return table[B][-1]

def conditional(event,given,table):
    """
    conditional(event,given,table)
    """
    return jointprob(event,given,table)/marginprob(given,table)

In [None]:
prob

In [None]:
p_sunny_given_yes = conditional('sunny','yes',prob)
print('Probability of sunny given yes: P(sunny|yes) = %0.3f'%p_sunny_given_yes)

In [None]:
p_sunny_given_no = conditional('sunny','no',prob)
print('Probability of sunny given no: P(sunny|no) = %0.3f'%p_sunny_given_no)

In [None]:
p_overcast_given_yes = conditional('overcast','yes',prob)
print('Probability of overcast given yes: P(overcast|yes) = %0.3f'%p_overcast_given_yes)

In [None]:
p_overcast_given_no = conditional('overcast','no',prob)
print('Probability of overcast given no: P(overcast|no) = %0.3f'%p_overcast_given_no)

In [None]:
p_rainy_given_yes = conditional('rainy','yes',prob)
print('Probability of rainy given yes: P(rainy|yes) = %0.3f'%p_rainy_given_yes)

In [None]:
p_rainy_given_no = conditional('rainy','no',prob)
print('Probability of rainy given no: P(rainy|no) = %0.3f'%p_rainy_given_no)

# Probability Tree

In [None]:
prob.columns.name

In [None]:
ind1

In [None]:
start = prob.index.name
ind1 = 'P(%s)=%0.3f'%('yes',marginprob('yes',prob))
ind2 = 'P(%s)=%0.3f'%('no',marginprob('no',prob))


# Given index-1 probability of events

event11 = '%s=%0.3f'%('sunny',conditional('sunny','yes',prob))
event12 = '%s=%0.3f'%('overcast',conditional('overcast','yes',prob))
event13 = '%s=%0.3f'%('rainy',conditional('rainy','yes',prob))

# Given index-2 probability of events

event21 = '%s=%0.3f'%('sunny',conditional('sunny','no',prob))
event22 = '%s=%0.3f'%('overcast',conditional('overcast','no',prob))
event23 = '%s=%0.3f'%('rainy',conditional('rainy','no',prob))

In [None]:
drawData = {'from':[start,ind1,ind1,ind1,start,ind2,ind2,ind2],
            'to':[ind1,event11,event12,event13,ind2,event21,event22,event23]}
draw = pd.DataFrame(drawData)
draw

In [None]:
fig = plt.figure(figsize=(10,5))

# Build your graph. Note that we use the DiGraph function to create the graph!
G=nx.from_pandas_edgelist(draw, 'from', 'to')

# Make the graph
nx.draw(G, with_labels=True, node_size=2000,alpha=0.8, arrows=True,linewitdh=50.0,cmap=plt.cm.Set1)


#  Classification Report

In [None]:
prob

# Bayes Theorem
## $P(A | B) = \frac{P(A) * A(B|A)}{P(B)}$
#### Example:
### $P(yes | rainy) = \frac{P(yes) * A(rainy|yes)}{P(rainy)}$

### $= \frac{P(yes) * A(rainy|yes)}{P(yes) * P(rainy|yes) + p(no) * P(rainy|no)}$

In [None]:
def bayes(event,given,table):
    """
                    P(A) * P(B | A)
    P(A | B)  =    _______________
                        P(B)
    >>> bayes(event,given,table)
    """
    
    margin_A = marginprob(event,table) # marginal probability
    cond = conditional(given,event,table) # conditional probability
    margin_B = marginprob(given,table) # marginal probability
    
    return margin_A * cond / margin_B

In [None]:
p_yes_sunny = bayes('yes','sunny',prob)
p_no_sunny = bayes('no','sunny',prob)

p_yes_overcast = bayes('yes','overcast',prob)
p_no_overcast = bayes('no','overcast',prob)

p_yes_rainy = bayes('yes','rainy',prob)
p_no_rainy = bayes('no','rainy',prob)


# Testing with test data

In [None]:
def testGreater(yes,no):
    if yes >= no: return 'yes'
    else: return 'no'

In [None]:
pred = []
for outlook in data['outlook']:
    if outlook == 'sunny':
        pred.append(testGreater(p_yes_sunny,p_no_sunny))
    elif outlook =='overcast':
        pred.append(testGreater(p_yes_overcast,p_no_overcast))
    else:
        pred.append(testGreater(p_yes_rainy,p_no_rainy))

In [None]:
data['outlook_play'] = pd.Series(pred)
data.head(1)

!pip install pandas_ml

In [None]:
from pandas_ml import ConfusionMatrix

In [None]:
y_true = np.array(list(data['play']))
y_pred = np.array(list(data['outlook_play']))

np.where(y_true == 'yes', True, False), np.where(y_pred == 'yes', True, False)

In [None]:
cm = ConfusionMatrix(np.where(y_true == 'yes', True, False), np.where(y_pred == 'yes', True, False))

In [None]:
cm

In [None]:
print(cm.print_stats())

# Great Job !!!