In [None]:
# Update sklearn to prevent version mismatches
!pip install sklearn --upgrade

In [None]:
# install joblib. This will be used to save your model. 
# Restart your kernel after installing 
!pip install joblib

In [None]:
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv(".csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()
df.head()

In [None]:
# Set features. This will also be used as your x values.
X = df(['features'])
X.head()

In [None]:
y = df['label']
y.head()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


X_train.head()

In [None]:

from sklearn.preprocessing import MinMaxScaler
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
# classifier
classifier.fit(X_train_scaled, y_train)

print(f"Training Data Score: {classifier.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test_scaled, y_test)}")

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'penalty': ["l1","l2"]}
grid = GridSearchCV(classifier, param_grid, verbose=3)
grid.fit(X_train_scaled, y_train)

In [None]:
# Train the model with GridSearch

In [None]:
print(grid.best_params_)
print(grid.best_score_)

In [None]:
predictions = classifier.predict(X_test_scaled)
predictions

In [None]:
predictions_df = pd.DataFrame({'prediction':predictions, 'actual':y_test})
predictions_df

In [None]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'logistic_regression.sav'
joblib.dump(grid, filename)