Presented here is a possible solution to the Kickstarter project in Kate.

In [None]:
from sklearn import neighbors


def preprocess(df):
    """This function takes a dataframe and preprocesses it so it is
    ready for the training stage.

    The DataFrame contains columns used for training (features)
    as well as the target column.

    It also contains some rows for which the target column is unknown.
    Those are the observations you will need to predict for KATE
    to evaluate the performance of your model.

    Here you will need to return the training set: X and y together
    with the preprocessed evaluation set: X_eval.

    Make sure you return X_eval separately! It needs to contain
    all the rows for evaluation -- they are marked with the column
    evaluation_set. You can easily select them with pandas:

         - df.loc[df.evaluation_set]

    For y you can either return a pd.DataFrame with one column or pd.Series.

    :param df: the dataset
    :type df: pd.DataFrame
    :return: X, y, X_eval
    """
    
    # df.index returns all indexes in data frame
    
    # Here X and y are training data
    X = df.drop(df.loc[df.evaluation_set].index, axis=0)[["deadline", "created_at", "launched_at"]]
    y = df.drop(df.loc[df.evaluation_set].index, axis=0)["state"]
    
    # Test data
    X_eval = df.loc[df.evaluation_set][["deadline", "created_at", "launched_at"]]

    return X, y, X_eval


def train(X, y):
    """Trains a new model on X and y and returns it.

    :param X: your processed training data
    :type X: pd.DataFrame
    :param y: your processed label y
    :type y: pd.DataFrame with one column or pd.Series
    :return: a trained model
    """
    # Selecting options for the algorithm
    clf = neighbors.KNeighborsClassifier(n_jobs=-1, n_neighbors=101)

    # Training the classifier
    model = clf.fit(X, y)

    return model


def predict(model, X_eval):
    """This functions takes your trained model as well
    as a processed test dataset and returns predictions.

    On KATE, the processed test dataset will be the X_eval you built
    in the "preprocess" function. If you're testing your functions locally,
    you can try to generate predictions using a sample test set of your
    choice.

    This should return your predictions either as a pd.DataFrame with one column
    or a pd.Series

    :param model: your trained model
    :param X_test: a processed test set (on KATE it will be X_eval)
    :return: y_pred, your predictions
    """
    # Making a prediction from the test data
    y_pred = model.predict(X_eval)

    return y_pred