In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load data into a Pandas DataFrame
df = pd.read_csv('ML_ML_ML.xlsx - Sheet1.csv', encoding='ISO-8859-1', sep=',')

# Remove rows with missing values
df.dropna(subset=['Item Name'], inplace=True)
df.reset_index(drop=True, inplace=True)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['Item Name'], df['Category'], test_size=0.2, random_state=42)

# Define a pipeline to vectorize the text data and train a linear support vector machine (SVM)
text_clf = Pipeline([('tfidf', TfidfVectorizer()),
                     ('clf', LinearSVC())])

# Train the model on the training data
text_clf.fit(X_train, y_train)

# Evaluate the model on the test data
predictions = text_clf.predict(X_test)
print(classification_report(y_test, predictions))





                    precision    recall  f1-score   support

        Attachment       0.95      0.75      0.84        53
CAbin & Electrical       1.00      1.00      1.00         1
Cabin & Electrical       0.87      0.89      0.88       229
           Charges       0.33      0.08      0.13        12
      Engine Parts       0.81      0.85      0.83       415
           Filters       0.95      0.97      0.96       969
               GET       0.96      0.95      0.95      1059
          Hardware       0.00      0.00      0.00        15
        Hydraulics       0.88      0.89      0.89       984
             Liner       0.83      0.90      0.86        21
        Lubricants       0.90      0.92      0.91       420
            Others       0.80      0.73      0.76       864
     Pins & Bushes       0.86      0.90      0.88       463
      Rock Breaker       0.92      0.94      0.93       245
             Seals       0.86      0.89      0.87       384
     Undercarriage       0.91      0.92

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib

# Load data into a Pandas DataFrame
df = pd.read_csv('/content/ML_ML_ML.xlsx - Sheet1.csv', encoding='ISO-8859-1', sep=',')

# Remove rows with missing values
df.dropna(subset=['Item Name'], inplace=True)
df.reset_index(drop=True, inplace=True)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['Item Name'], df['Category'], test_size=0.2, random_state=42)

# Define a pipeline to vectorize the text data and train a random forest classifier
text_clf = Pipeline([('tfidf', TfidfVectorizer()),
                     ('clf', RandomForestClassifier())])

# Train the model on the training data
text_clf.fit(X_train, y_train)

# Evaluate the model on the test data
predictions = text_clf.predict(X_test)
print(classification_report(y_test, predictions))

# Save the model to a file
joblib.dump(text_clf, 'text_clf.joblib')

# Load the saved model from a file
loaded_text_clf = joblib.load('text_clf.joblib')

df_2 = pd.read_csv('ML_ML_ML_Input File.xlsx - Sheet1.csv')

new_inputs = df_2['Item Name Input'].tolist()


# Define a list of new inputs
#new_inputs = ['Swash Plate', 'Injector', 'Accelerator Cable']

# Use the loaded model to make predictions on the new inputs
new_predictions = loaded_text_clf.predict(new_inputs)

# Print the predictions
print(new_predictions)

# Convert the list into a pandas Series
my_series = pd.Series(new_predictions)

# Create a DataFrame with a single column
df_2['Predicted Value'] = my_series

# Print the resulting DataFrame
print(df_2)

df_2.to_csv('Extracted_ML_Category.csv')




                    precision    recall  f1-score   support

        Attachment       0.89      0.77      0.83        53
CAbin & Electrical       1.00      1.00      1.00         1
Cabin & Electrical       0.91      0.89      0.90       229
           Charges       0.50      0.17      0.25        12
      Engine Parts       0.87      0.90      0.88       415
           Filters       0.95      0.97      0.96       969
               GET       0.95      0.97      0.96      1059
          Hardware       0.25      0.07      0.11        15
        Hydraulics       0.89      0.91      0.90       984
             Liner       0.95      0.90      0.93        21
        Lubricants       0.92      0.93      0.92       420
            Others       0.83      0.79      0.81       864
     Pins & Bushes       0.89      0.92      0.91       463
      Rock Breaker       0.92      0.93      0.93       245
             Seals       0.88      0.90      0.89       384
     Undercarriage       0.95      0.91

In [None]:
#Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

##Load data into a Pandas DataFrame
df = pd.read_csv('ML_ML_ML.xlsx - Sheet1.csv', encoding='ISO-8859-1', sep=',')

#Remove rows with missing values
df.dropna(subset=['Item Name'], inplace=True)
df.reset_index(drop=True, inplace=True)

#Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['Item Name'], df['Category'], test_size=0.2, random_state=42)

#Define a pipeline to vectorize the text data and train a Multinomial Naive Bayes classifier
text_clf = Pipeline([('tfidf', TfidfVectorizer()),
('clf', MultinomialNB())])

#Train the model on the training data
text_clf.fit(X_train, y_train)

#Evaluate the model on the test data
predictions = text_clf.predict(X_test)
print(classification_report(y_test, predictions))

                    precision    recall  f1-score   support

        Attachment       1.00      0.02      0.04        53
CAbin & Electrical       0.00      0.00      0.00         1
Cabin & Electrical       0.91      0.76      0.83       229
           Charges       0.00      0.00      0.00        12
      Engine Parts       0.80      0.64      0.71       415
           Filters       0.88      0.98      0.93       969
               GET       0.92      0.96      0.94      1059
          Hardware       0.00      0.00      0.00        15
        Hydraulics       0.76      0.90      0.83       984
             Liner       1.00      0.48      0.65        21
        Lubricants       0.91      0.83      0.87       420
            Others       0.75      0.68      0.71       864
     Pins & Bushes       0.81      0.87      0.84       463
      Rock Breaker       0.94      0.89      0.91       245
             Seals       0.84      0.85      0.85       384
     Undercarriage       0.90      0.88

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
