In [2]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression


### Load the dataset from a csv file and clean it up. Remove all the NA cells to prevent issues in the code. Then, Preprocess all of the work order descriptions

In [3]:
df = pd.read_excel('A32 WO List.xlsx').dropna()

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Work Order Description (Work Order)'])

### Use singular Value Decomposition to reduce the amount of dimensions of the Data

In [4]:
svd = TruncatedSVD(n_components=50)
X_reduced = svd.fit_transform(X)

### Cluster the work orders to however many you want. For the purpose of limiting the scope of this problem solving tool, We will have 5 clusters.

In [8]:
kmeans = KMeans(n_clusters=10)
clusters = kmeans.fit_predict(X_reduced)

### Training a classifier to correlate the notes of the work order to the issue identified. 

In [6]:
classifier = LogisticRegression()
classifier.fit(X, df['Note (Work Order-Note)'])

LogisticRegression()

### Extract the top 10 repeated issues and their proposed solutions by looping through the clusters and counting the number of work orders. Then, print the most common problem and the solution for it. 

In [13]:
for i in range(5):
    mask = (clusters == i)
    cluster_df = df[mask]

    count = len(cluster_df)

    most_common = cluster_df['Work Order Description (Work Order)'].mode()[0]

    X_common = vectorizer.transform([most_common])
    solution = classifier.predict(X_common)[0]

    print('Cluster {}: {} work orders'.format(i + 1, count))
    print('Most common problem: {}'.format(most_common))
    print('Proposed solution: {}\n'.format(solution))

Cluster 1: 67 work orders
Most common problem: Need new home switch on the 6th position cross slide-1063--
Proposed solution: 02/14/13 7259 REPLACED SWITCH AND CORD SEAL.

Cluster 2: 109 work orders
Most common problem: Machine will not reset-acme nocam--
Proposed solution: 12/17/12 8949 MACHINE RESETS OKAY, BUT AS SOON AS YOU HOME UNITS F7 MACHINE FAULTS. JUMPED 180-72 NO FAULTS ALL UNITS HOME OKAY. JUMPED 180-A FAULTS WHEN UNITS ARE HOMED. JUMPED 72-A, NO FAULTS, 72-A IS ZX-1 MAIN SLIDE. CHECKED SWITCHES AT INPUT/OUTPUT BOARD, THEY SEEM TO BE FINE. FAULT STRING ON PRINTS IS 72-180 STARTING WITH ZX1. LOOK AT ACTUAL WIRING OF DRIVES, IT'S 72B-180. NOT SURE WHAT CRWD AND CRF1 ARE, COULD NOT LOCATE THEM ON THE PRINTS. OBVIOUSLY THEY ARE CONTROL RELAYS BUT FOR WHAT, NO CLUE, AND BECAUSE PRINTS DO NOT SHOW THEM I'M NOT SURE IF THEY ARE PART OF DRIVE STRING OR NOT, OR IF DRIVE STRING ACTUALLY STARTS AT 72B AND NOT 72?? NO DRIVE IN STOCK TO REPLACE ZX1 WITH, WHICH I BELIEVE THE FAULT CONTACT