<a href="https://colab.research.google.com/github/gnryu/DataScience_TermProject/blob/main/modeling_evaluation_func.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Linear Regression**

In [5]:
# input: whole dataset, X value, y value
def Linear_Regression(df, X, y):
  from sklearn.linear_model import LinearRegression
  from sklearn.model_selection import train_test_split
  import matplotlib.pyplot as plt

  X = df[X].values.reshape(-1,1)
  y = df[y].values

  # Split data
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 10)

  #create model & train model
  model = LinearRegression()
  model.fit(X_train, y_train)
  y_predict = model.predict(X_test)

  # scorer
  score = model.score(X_test, y_test)

  # print & draw
  print("Socre: " + str(score))
  print("coef_ : " + str(model.coef_))
  print("intercept_ : " + str(model.intercept_))

  plt.plot(X_test, y_predict)

  plt.show()

## **KNN**

In [2]:
# modeling
# input: whole dataset, target feature, scaling/encoding method
# return: best accurate k, test set performance of KNN model of that k
def KNN(data, target, scale_encode_method):
  from sklearn.model_selection import train_test_split
  from sklearn.neighbors import KNeighborsClassifier
  from sklearn.model_selection import GridSearchCV

  # create a dataframe with all training data except the target feature
  X = data.drop(columns = [target])

  X = scale_encode_method(X) # scale the data
  y = data[target].values # seperate target feature

  # split dataset into train and test data
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1, stratify = y)

  # create new knn model
  knn = KNeighborsClassifier()

  # create a dictionary of all values we want to test for n_neighbors
  param_grid = {'n_neighbors': np.arange(1, 25)}

  # use GridSearch to test all values for n_neighbors
  knn_gscv = GridSearchCV(knn, param_grid, cv = 5)

  # fit model to data
  knn_gscv.fit(X_train, y_train)

  # check top performing n_neighbors value
  # print(knn_gscv.best_params_)

  # check the mean score for the top performing value of n_neighbors
  # print(knn_gscv.best_score_)

  knn2 = KNeighborsClassifier(n_neighbors = knn_gscv.best_params_.get('n_neighbors'))
  knn2.fit(X_train, y_train)
  # print(knn2.score(X_test, y_test))

  return knn_gscv.best_params_, knn2.score(X_test, y_test)

In [3]:
# evaluate
# input: whole dataset, target feature, scaling/encoding method, k obtained through KNN()
# return: performance in each bundle of k-fold cross validation
def KNN_eval(data, target, scale_encode_method, knn_best_k):
  from sklearn.neighbors import KNeighborsClassifier
  from sklearn.model_selection import cross_val_score
  # create a dataframe with all training data except the target feature
  X = data.drop(columns = [target])

  X = scale_encode_method(X) # scale the data
  y = data[target].values # seperate target feature

  knn_cv = KNeighborsClassifier(n_neighbors = knn_best_k.get('n_neighbors'))
  cv_scores = cross_val_score(knn_cv, X, y, cv = 5)

  # print(cv_scores)
  # print('cv_scores mean: {}'.format(np.mean(cv_scores)))
  return cv_scores

## **Decision Tree Classifier**

In [4]:
# input: whole dataset
# return: decision tree's test score and train score
def decision_tree(data):
    import pandas as pd
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.model_selection import train_test_split
    from sklearn import tree

    dp=pd.DataFrame.copy(data)
    #data
    X = dp.iloc[:, 1:].values
    #target
    y = data['Severity'].values

    #split data train,test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3,stratify=y,random_state=1)
    #use decisiontree
    tree_model = DecisionTreeClassifier()
    #fit to model
    tree_model.fit(X_train,y_train)
    #check score
    print("Train Set Score:{:.2f}".format(tree_model.score(X_train,y_train)))
    print("test Set Score:{:.2f}".format(tree_model.score(X_test,y_test)))