In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# ForeSight2020
## Table of Contents <a name = 'toc'></a>
1. [Background/Abstract](#abstract)
1. [Analysis of Data Sources](#datasources)
    1. [BPCS Archived_SO](#bpcs_archived_so)
    2. [BPCS Current_SO](#bpcs_current_so)
    2. [PlantStar](#plantstar)
    3. [SmartSolve NCs](#smartsolve_ncs)
2. [Combined Data Analysis](#combinedanalysis)
4. [Predicting Quality Issues](#predicition)


## Abstract
Available data sources for components, tools, presses and non-conformances are explored.  Then machine learning algorithms are applied in attempt to predict the likelihood of a quality issue occurring based on the components, tools and press used during production. 

## Analysis of Data Sources <a name = 'datasources'></a>

Purpose: analyze each data source individually before combining. 

#### BPCS Archived_SO <a name = 'bpcs_archived_so'></a> 
About: 
* Shop orders that have been completed
* pulled from "connecting to db.ipyn"
* Does not contain Press loc/machine number/tool
* arch_so is helpful for data analysis
* arch_so_encoded is helpful for machine learning



In [3]:
arch_so = pd.read_csv('./clean_data/archived_so_clean.csv')
arch_so_encoded = pd.read_csv('./clean_data/archived_so_encoded.csv')
arch_so.Date = pd.to_datetime(arch_so.Date,format = '%Y%m%d')
print("*"*10+' Archived_SO Summary Statistics ' +'*'*10)
print('Date of first record = {}'.format(arch_so.Date.min()))
print('Date of most recent record = {}'.format(arch_so.Date.max()))
print('Number of unique lot numbers = {}'.format(len(arch_so['Lot Number'].unique())))
arch_so.head()

********** Archived_SO Summary Statistics **********
Date of first record = 2020-01-01 00:00:00
Date of most recent record = 2021-12-30 00:00:00
Number of unique lot numbers = 4069


Unnamed: 0.1,Unnamed: 0,Lot Number,Product,Date,Shop Order,requested_qty,packaging,resin,colorant,Formula Sheet,...,lid component cap,lid component washer,container,TRAY,fg,lid component cowl,bracket key,cannula,tip cap,NC
0,1,19191259,R8930LA,2020-04-01,191259,6864.0,,,,,...,,,,,,,,,,0
1,2,19D27563X,4502570,2020-12-01,188358,335000.0,EXP5013,,,DC00056758,...,,,,,,,,,,0
2,3,19E02463X,4502570,2020-12-01,188400,309000.0,EXP5013,,,DC00056758,...,,,,,,,,,,1
3,4,19E02563X,4502570,2020-12-01,188401,48000.0,5542735,0301001,,DC00056758,...,,,,,,,,,,1
4,5,19E188488,R7046N,2020-02-12,188488,18885.0,,RM004,,FS10002847,...,,,,,,,,,,0


In [17]:
arch_so_encoded

Unnamed: 0,Shop Order,Lot Number,requested_qty,047,065,1181200777,1522SA,1523SA,1525SA,31139747,...,R8534,R8535,R8550,R9806,8507SA.1,R7624.1,R1594,4500906,scaled_requested_qty,NC
0,191259,19191259,6864.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.235876,0
1,188358,19D27563X,335000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.512027,0
2,188400,19E02463X,309000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.618557,1
3,188401,19E02563X,48000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.649485,1
4,188488,19E188488,18885.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.648969,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4064,198819,21J198819,1890.0,,,,,,,,...,,,,,,,,,0.064948,0
4065,198832,21J198832,185.0,,,,,,,,...,,,,,,,,,0.006357,0
4066,198834,21J198834,1044.0,,,,,,,,...,,,,,,,,,0.035876,0
4067,198864,21J198864,5700.0,,,,,,,,...,,,,,,,,,0.195876,0
