In [1]:
import pandas as pd
import numpy as np
from fractions import Fraction 

In [2]:
df = pd.read_excel('p.1.xls')

In [3]:
df.head()

Unnamed: 0,age,prescription,astigmatic,tear_rate,lenses
0,young,myope,yes,normal,hard
1,young,myope,no,normal,soft
2,young,hypermetrope,yes,reduced,none
3,young,hypermetrope,no,normal,soft
4,young,hypermetrope,no,reduced,none


In [4]:
cls=df['lenses']

In [5]:
data=df.drop('lenses',axis = 1)

In [6]:
data

Unnamed: 0,age,prescription,astigmatic,tear_rate
0,young,myope,yes,normal
1,young,myope,no,normal
2,young,hypermetrope,yes,reduced
3,young,hypermetrope,no,normal
4,young,hypermetrope,no,reduced
5,presbyopic,myope,yes,reduced
6,presbyopic,myope,yes,normal
7,presbyopic,hypermetrope,yes,reduced
8,presbyopic,hypermetrope,yes,normal
9,presbyopic,hypermetrope,no,normal


In [7]:
min_error = 100
ideal_variable = None
result = dict()
response = list()


for i in data:
    
    result[str(i)] = dict()
 
    join_data = pd.DataFrame({"variable":data[i], "label":cls})
    
    cross_table = pd.crosstab(join_data.variable, join_data.label)
            
    summary = cross_table.idxmax(axis=1)
    
    result[str(i)] = dict(summary)
        
    matrix_values = cross_table.values
        
    sum_row = matrix_values.sum(axis=1)
        
    nominator_sum = 0

    denominator_sum = 0

    for k in range(len(sum_row)):
        
        denominator = sum_row[k]
     
        nominator = sum_row[k] - np.max(matrix_values[k])
                                                                              
        nominator_sum  += nominator
                                                                                                    
        denominator_sum += denominator
    
    error = Fraction(nominator_sum,denominator_sum)
            
    if error < min_error:
        
        min_error = error
        
        ideal_variable = i
        
    result_feature = {"variable": str(i), "error":error, "rules": result[str(i)] }
        
    response.append(result_feature)
    

    
    
    
    
    

In [8]:
print("ideal variable: ",ideal_variable)

print("error", min_error)

ideal variable:  astigmatic
error 5/16


In [9]:
result

{'age': {'Pre-presbyopic': 'none', 'presbyopic': 'none', 'young': 'none'},
 'prescription': {'hypermetrope': 'none', 'myope': 'hard'},
 'astigmatic': {'no': 'soft', 'yes': 'none'},
 'tear_rate': {'normal': 'soft', 'reduced': 'none'}}

In [10]:
response

[{'variable': 'age',
  'error': Fraction(1, 2),
  'rules': {'Pre-presbyopic': 'none', 'presbyopic': 'none', 'young': 'none'}},
 {'variable': 'prescription',
  'error': Fraction(7, 16),
  'rules': {'hypermetrope': 'none', 'myope': 'hard'}},
 {'variable': 'astigmatic',
  'error': Fraction(5, 16),
  'rules': {'no': 'soft', 'yes': 'none'}},
 {'variable': 'tear_rate',
  'error': Fraction(5, 16),
  'rules': {'normal': 'soft', 'reduced': 'none'}}]

# 1R algorithm

In [11]:
model = {'variable': ideal_variable,
         'predictor': response[2]['rules']}
print(model)

{'variable': 'astigmatic', 'predictor': {'no': 'soft', 'yes': 'none'}}


In [12]:
def predict(X_test, model):
    variable = model['variable']
    predictor = model['predictor']
    y_predicted = np.array([predictor[sample] for sample in data[variable]])
    return y_predicted

In [13]:
model

{'variable': 'astigmatic', 'predictor': {'no': 'soft', 'yes': 'none'}}

In [14]:
y_predicted = predict(data, model)
print(y_predicted)

['none' 'soft' 'none' 'soft' 'soft' 'none' 'none' 'none' 'none' 'soft'
 'soft' 'none' 'none' 'soft' 'none' 'soft']


In [15]:
# Compute the accuracy by taking the mean of the amounts that y_predicted is equal to y_test
accuracy = np.mean(y_predicted == cls) * 100
print("The test accuracy is {:.1f}%".format(accuracy))

The test accuracy is 68.8%


In [16]:
from sklearn.metrics import classification_report
print(classification_report(cls, y_predicted))

              precision    recall  f1-score   support

        hard       0.00      0.00      0.00         3
        none       0.67      0.75      0.71         8
        soft       0.71      1.00      0.83         5

   micro avg       0.69      0.69      0.69        16
   macro avg       0.46      0.58      0.51        16
weighted avg       0.56      0.69      0.61        16



  'precision', 'predicted', average, warn_for)


# 0R algorithm

In [17]:
zeroModel=cls.describe().top

In [18]:

zero_accuracy=cls.describe().freq/cls.describe()['count']

print("The test accuracy is {:.1f}%".format(zero_accuracy))

The test accuracy is 0.5%
