In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedShuffleSplit

#read data set
train = pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')

#x_train = train.drop(['species', 'id'], axis=1).values
#le = LabelEncoder().fit(train['species'])
#y_train = le.transform(train['species'])
#x_test = test.drop(['id'], axis=1).values
    
def encode(train, test):
    le = LabelEncoder().fit(train.species) 
    y_train = le.transform(train.species)           # encode species strings
    classes = list(le.classes_)                    # save column names for submission
    test_ids = test.id                             # save test ids for submission
    
    x_train = train.drop(['species', 'id'], axis=1)  
    x_test = test.drop(['id'], axis=1)
    
    return x_train, y_train, x_test, test_ids, classes

x_train, y_train, x_test , test_ids, classes = encode(train, test)

scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)


In [None]:
#Logistic Regression with Grid Search CV
        
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

#set a seed
seed = 42

#Logistic regression model
log_reg= LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, 
          class_weight=None, random_state=seed, solver='lbfgs', max_iter=100, multi_class='multinomial', verbose=1, warm_start=True, n_jobs= -1)

#Tuning the hyperparameters "C" which is the inverse of hyperparameter strength and "tol" which is tolerance for stopping criteria with GridSearchCV
classifier = GridSearchCV(log_reg, param_grid = {'C':[0.1,0.5,1,10, 50, 100, 500, 1000, 2000], 'tol': [0.001, 0.005,0.0001]}, scoring='neg_log_loss', refit='True', n_jobs=1, cv=10)

# Fit model.
classifier.fit(x_train,y_train)

# Make prediction for test data
y = classifier.predict(x_test)
y_prob = classifier.predict_proba(x_test)

submission = pd.DataFrame(y_prob, columns=classes)
submission.insert(0, 'id', test_ids)
submission.reset_index()
submission.to_csv('submission.csv', index = False)