# Import Libraries and Load Data

In [7]:
import pandas as pd
import os
# Define the file path
df = pd.read_csv('tennis.csv')
# Strip any leading or trailing spaces from column names
df.columns = df.columns.str.strip()
# Display the first few rows and check columns
print("First few rows of the DataFrame:")
print(df)
print("\nColumn names:")
print(df.columns)

First few rows of the DataFrame:
     Outlook Play
0      Rainy  yes
1      Sunny  yes
2   Overcast  yes
3   Overcast  yes
4      Sunny   no
5      Rainy  yes
6      Sunny  yes
7   Overcast  yes
8      Rainy   no
9      Sunny   no
10     Sunny  yes
11     Rainy   no
12  Overcast  yes
13  Overcast  yes

Column names:
Index(['Outlook', 'Play'], dtype='object')


# Verify Columns and Display the Frequency Table

In [8]:
# Check if the columns 'Outlook' and 'Play' exist
if 'Outlook' in df.columns and 'Play' in df.columns:
    # Calculate Frequency Table
    frequency_table = pd.crosstab(df['Outlook'], df['Play'])
    frequency_table.columns = ['no', 'yes']
    frequency_table.loc['total'] = frequency_table.sum()
    frequency_table['total'] = frequency_table.sum(axis=1)
    
    print("\nFrequency Table:")
    print(frequency_table)
else:
    print("The required columns 'Outlook' and 'Play' are missing in the DataFrame.")



Frequency Table:
          no  yes  total
Outlook                 
Overcast   0    5      5
Rainy      2    2      4
Sunny      2    3      5
total      4   10     14


# Calculate and Display the Likelihood Table

In [9]:
if 'Outlook' in df.columns and 'Play' in df.columns:
    # Calculate Likelihood Table
    likelihood_table = pd.crosstab(df['Outlook'], df['Play'], normalize='all')
    likelihood_table = likelihood_table.rename_axis('Outlook').reset_index()
    likelihood_table['total'] = likelihood_table['yes'] + likelihood_table['no']
    likelihood_table['pro'] = likelihood_table['yes'] / likelihood_table['total']
    
    # Calculate totals for 'no' and 'yes'
    total_yes = likelihood_table['yes'].sum() / likelihood_table['total'].sum()
    total_no = likelihood_table['no'].sum() / likelihood_table['total'].sum()
    total_row = pd.DataFrame([['total', total_no, total_yes]],
                             columns=['Outlook', 'no', 'yes'])
    
    # Append the totals row
    likelihood_table = pd.concat([likelihood_table, total_row], ignore_index=True)
    
    # Drop the 'pro' column
    likelihood_table = likelihood_table.drop(columns=['pro'])
    
    # Convert values to numeric where necessary
    likelihood_table[['no', 'yes']] = likelihood_table[['no', 'yes']].apply(pd.to_numeric)
    
    # Format the output to match required specifications
    likelihood_table = likelihood_table.set_index('Outlook')
    likelihood_table = likelihood_table.round(2)
    
    print("\nLikelihood Table:")
    print(likelihood_table)
else:
    print("The required columns 'Outlook' and 'Play' are missing in the DataFrame.")



Likelihood Table:
            no   yes  total
Outlook                    
Overcast  0.00  0.36   0.36
Rainy     0.14  0.14   0.29
Sunny     0.14  0.21   0.36
total     0.29  0.71    NaN


# Apply Bayes' Theorem and Make Prediction

In [10]:
if 'Outlook' in df.columns and 'Play' in df.columns:
    # Apply Bayes' Theorem for Prediction
    prior_play_yes = df['Play'].value_counts(normalize=True).get('yes', 0)
    prior_play_no = df['Play'].value_counts(normalize=True).get('no', 0)
    
    P_sunny_given_play_yes = likelihood_table.loc['Sunny', 'yes']
    P_sunny_given_play_no = likelihood_table.loc['Sunny', 'no']
    
    posterior_play_yes = P_sunny_given_play_yes * prior_play_yes
    posterior_play_no = P_sunny_given_play_no * prior_play_no
    
    total = posterior_play_yes + posterior_play_no
    posterior_play_yes /= total
    posterior_play_no /= total
    
    print(f'\nP(Play=yes | Outlook=Sunny) = {posterior_play_yes:.2f}')
    print(f'P(Play=no | Outlook=Sunny) = {posterior_play_no:.2f}')
    
    # Print the Prediction Result
    if posterior_play_yes > posterior_play_no:
        print('If the weather is sunny, the player should play.')
    else:
        print('If the weather is sunny, the player should not play.')
else:
    print("The required columns 'Outlook' and 'Play' are missing in the DataFrame.")


P(Play=yes | Outlook=Sunny) = 0.79
P(Play=no | Outlook=Sunny) = 0.21
If the weather is sunny, the player should play.
