<a href="https://colab.research.google.com/github/goldss97/opensw_project2/blob/main/opensw_proj2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
%%writefile opensw_proj2.py
#PLEASE WRITE THE GITHUB URL BELOW!
#
import pandas as pd
import sys
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler  
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

def load_dataset(dataset_path):
	return pd.read_csv(dataset_path)

def dataset_stat(dataset_df):
  result=dataset_df.groupby("target").size()
  return dataset_df.shape[1]-1,result[0],result[1]

def split_dataset(dataset_df, testset_size):
  
  X = dataset_df.drop(columns="target",axis=1)
  y = dataset_df["target"]
  X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=testset_size,random_state=2)
  return X_train,X_test,y_train,y_test

def decision_tree_train_test(x_train, x_test, y_train, y_test):
  dt_cls=DecisionTreeClassifier()
  dt_cls.fit(x_train,y_train)
  return accuracy_score(y_test,dt_cls.predict(x_test)),precision_score(y_test,dt_cls.predict(x_test)),recall_score(y_test,dt_cls.predict(x_test))

def random_forest_train_test(x_train, x_test, y_train, y_test):
  rf_cls=RandomForestClassifier()
  rf_cls.fit(x_train,y_train)
  return accuracy_score(rf_cls.predict(x_test),y_test),precision_score(rf_cls.predict(x_test),y_test),recall_score(rf_cls.predict(x_test),y_test)

def svm_train_test(x_train, x_test, y_train, y_test):
  svm_pipe=make_pipeline(
      StandardScaler(),
      SVC()
  )
  svm_pipe.fit(x_train,y_train)
  return accuracy_score(y_test,svm_pipe.predict(x_test)),precision_score(y_test,svm_pipe.predict(x_test)),recall_score(y_test,svm_pipe.predict(x_test))

def print_performances(acc, prec, recall):
	#Do not modify this function!
	print ("Accuracy: ", acc)
	print ("Precision: ", prec)
	print ("Recall: ", recall)

if __name__ == '__main__':
	#Do not modify the main script!
	data_path = sys.argv[1]
	data_df = load_dataset(data_path)

	n_feats, n_class0, n_class1 = dataset_stat(data_df)
	print ("Number of features: ", n_feats)
	print ("Number of class 0 data entries: ", n_class0)
	print ("Number of class 1 data entries: ", n_class1)

	print ("\nSplitting the dataset with the test size of ", float(sys.argv[2]))
	x_train, x_test, y_train, y_test = split_dataset(data_df, float(sys.argv[2]))

	acc, prec, recall = decision_tree_train_test(x_train, x_test, y_train, y_test)
	print ("\nDecision Tree Performances")
	print_performances(acc, prec, recall)

	acc, prec, recall = random_forest_train_test(x_train, x_test, y_train, y_test)
	print ("\nRandom Forest Performances")
	print_performances(acc, prec, recall)

	acc, prec, recall = svm_train_test(x_train, x_test, y_train, y_test)
	print ("\nSVM Performances")
	print_performances(acc, prec, recall)

Writing opensw_proj2.py


In [45]:
%%python opensw_proj2.py /content/drive/MyDrive/opensw_project2/heart.csv 0.4



Number of features:  13
Number of class 0 data entries:  499
Number of class 1 data entries:  526

Splitting the dataset with the test size of  0.4

Decision Tree Performances
Accuracy:  0.9463414634146341
Precision:  0.9619565217391305
Recall:  0.921875

Random Forest Performances
Accuracy:  0.9609756097560975
Precision:  0.953125
Recall:  0.9631578947368421

SVM Performances
Accuracy:  0.9048780487804878
Precision:  0.8963730569948186
Recall:  0.9010416666666666
