# Predicting whether an order should be sent to a technical approver

## Part 1: Load and examine the data

In [None]:
dataset = 'orders_with_predicted_value.csv'

In [None]:
import pandas as pd
from time import sleep
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv(f'./{dataset}')
df.head()

In [None]:
print(f'Number of rows in dataset: {df.shape[0]}')
print(df[df.columns[0]].value_counts())

## Part 2: Get the data into the right shape

In [None]:
encoded_data = pd.get_dummies(df)
encoded_data.head()

In [None]:
corrs = encoded_data.corr()['tech_approval_required'].abs()
columns = corrs[corrs > .1].index
corrs = corrs.filter(columns)
corrs

In [None]:
encoded_data = encoded_data[columns]
encoded_data.head()

## Part 3: Create training, validation and test data sets

In [None]:
train_df, val_and_test_data = train_test_split(encoded_data, test_size=0.3, random_state=0)

In [None]:
train_df.shape

In [None]:
val_and_test_data.shape

## Part 4: Train the model

In [None]:
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings("ignore")
classifier = LogisticRegression(solver="lbfgs")

In [None]:
input_to_classifier = train_df[train_df.columns[train_df.columns != "tech_approval_required"]].values
output = train_df["tech_approval_required"].values

In [None]:
input_to_classifier.shape

In [None]:
output.shape

In [None]:
classifier.fit(input_to_classifier, output)

## Part 5: Test the model

In [None]:
test_input = val_and_test_data[train_df.columns[train_df.columns != "tech_approval_required"]].values
predictions = classifier.predict(test_input)
val_and_test_data["prediction"] = predictions

In [None]:
val_and_test_data.head()

In [None]:
(val_and_test_data['prediction'] == val_and_test_data['tech_approval_required']).mean()