In [None]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
#
# This is how to get <lines> from the text file downloaded from
#  https://archive.ics.uci.edu/ml/datasets/Iris
# with open('iris.data') as f:
#     lines = f.readlines()
# f.close()
#


# Step 1: (some preprocessing)
# 
# <lines> is a 1-dim list, which looks like:
#   ['5.1,3.5,1.4,0.2,Iris-setosa\n', '4.9,3.0,1.4,0.2,Iris-setosa\n', '4.7,3.2,1.3,0.2,Iris-setosa\n', ...]
# for each entry, the strings is of the form 'sepal_length, sepal_width, petal_length, petal_width, class\n'
# 
# Similar to what you did in the midterm question
#
# convert <lines> into a dataframe and store in to a variable named <data>
#
# Split the dataset into training and testing according to the split_ratio.
# 
# return of the function should be of the order X_train, X_test, Y_train, Y_test
def data_prep(lines, split_ratio):
    for i in range(len(lines)):
        lines[i] = lines[i].split(",")
        lines[i][-1] = lines[i][-1]
    df = pd.DataFrame(lines, columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])
    X = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].astype(float)
    Y = df[['class']]
    X_train, X_test, Y_train, Y_test = train_test_split(X,Y, train_size = split_ratio,shuffle = False)
    return X_train, X_test, Y_train, Y_test

# Step 2: (fit with decision tree model)
#
# Use built-in decision tree classifier in sklearn to fit a decision tree model 
# using the training set from the previous step
# Just call DecisionTreeClassifier() with default setting, this means Gini Impurity
# is calculated when deciding the node features.
# 
# Predict on the test set
#
# Important note: Please fix the random_state to be 1 in order to pass the test
def DT_with_prediction(lines, split_ratio):
    # Call step 1
    X_train, X_test, Y_train, Y_test = data_prep(lines, split_ratio)
    # Initialize the decision tree model under default setting
    tree = DecisionTreeClassifier(random_state=1)
    # Fit the model
    tree.fit(X_train, Y_train)
    # Calculate accuracy on test set and assign it to the variable <acc>
    acc = tree.score(X_test, Y_test)

    # You can use the following code to visualize the decision tree
    # tmp_data = tree.export_graphviz(<you decision tree model object>, out_file=None, feature_names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
    # graph = pydotplus.graph_from_dot_data(tmp_data)
    # graph.write_png('mydecisiontree.png')

    print(f"Accuracy: {acc:.6f}")
    return

# Step 3:
# 
# Do almost the same as step 2
# Try using 'entropy' as criterion instead of default Gini this time.
#
# Important note: Please fix the random_state to be 1 in order to pass the test
def DT_with_prediction_2(lines, split_ratio):
    
    # Call step 1
    X_train, X_test, Y_train, Y_test = data_prep(lines, split_ratio)
    # Initialize the decision tree model using 'entropy' as criterion
    tree = DecisionTreeClassifier(criterion = "entropy", random_state=1)
    # Fit the model
    tree.fit(X_train, Y_train)
    # Calculate accuracy on test set and assign it to the variable <acc>
    acc = tree.score(X_test, Y_test)
    print(f"Accuracy: {acc:.6f}")
    return
def test1(lines):
    X_train, X_test, Y_train, Y_test = data_prep(lines, split_ratio=0.8)
    print(X_train.head())
    print(X_test.head())
    print(Y_train.head())
    print(Y_test.head())
    
def test2(lines):
    X_train, X_test, Y_train, Y_test = data_prep(lines, split_ratio=0.9)
    print(X_train.head())
    print(X_test.head())
    print(Y_train.head())
    print(Y_test.head())
    
def test3(lines):
    X_train, X_test, Y_train, Y_test = data_prep(lines, split_ratio=0.5)
    print(X_train.head())
    print(X_test.head())
    print(Y_train.head())
    print(Y_test.head())
      
def test4(lines):  
    DT_with_prediction(lines, 0.6)

def test5(lines):
    DT_with_prediction(lines, 0.7)
        
def test6(lines):
    DT_with_prediction(lines, 0.8)
        
def test7(lines):
    DT_with_prediction(lines, 0.9)

def test8(lines):
    DT_with_prediction_2(lines, 0.6)
    
def test9(lines):
    DT_with_prediction_2(lines, 0.7)
    
def test10(lines):
    DT_with_prediction_2(lines, 0.8)
        
def test11(lines):
    DT_with_prediction_2(lines, 0.9)
    
if __name__ == '__main__':
    tmp = input()
    a = input()
    a = [tmp.strip('\\n') for tmp in a.strip('][').split("'") if tmp.strip(' ')!='' and tmp.strip(' ')!= ',']
    globals()['test' + tmp](a)