In [1]:
import os
import re
import pickle
import numpy as np
import pandas as pd
from collections import Counter
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv(r'drug200.csv')

In [None]:
def preprocess(df):
    le = LabelEncoder()
    columns_to_encode = ['Sex', 'BP', 'Cholesterol', 'Drug']

    for column in columns_to_encode:
        df[column] = le.fit_transform(df[column])

    X = df.iloc[:, :-1].values
    y = df.iloc[:, -1].values

    st = StandardScaler()
    X = st.fit_transform(X)

    return X, y

# Assuming your original DataFrame is 'your_data'
X_processed, y_processed = preprocess(df)

# Splitting the preprocessed data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y_processed, test_size=0.2, random_state=42)

# Now you have X_train, X_test (features) and y_train, y_test (labels) for training and testing your model

In [None]:
def model_evaluate(model):
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))

In [None]:
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
y_pred_rfc = rfc.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_rfc)
accuracy

In [None]:
model_evaluate(rfc)

In [None]:
pipe = Pipeline([('vectorizer', vectorizer), ('randomforestclassifier', rfc)])
pipe.fit(X_train, y_train)

model_evaluate(pipe)

In [7]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [2]:
df = pd.read_csv(r'drug200.csv')

In [3]:
df.isnull().sum().any()

False

In [4]:
numeric_transformer = Pipeline(steps=[ 
    ('scaler', StandardScaler())
])

In [8]:
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [9]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]   

In [10]:
categorical_features= X.select_dtypes(include=['object', 'category']).columns
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns

In [11]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [12]:
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('classifier', RandomForestClassifier())])

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
model.fit(X_train, y_train)

In [15]:
y_predict= model.predict(X_test)

In [17]:
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

       DrugY       1.00      1.00      1.00        15
       drugA       1.00      1.00      1.00         6
       drugB       1.00      1.00      1.00         3
       drugC       1.00      1.00      1.00         5
       drugX       1.00      1.00      1.00        11

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40



In [21]:
def model_evaluate(model):
    y_pred = model.predict(X_test)
    print(classification_report(y_test, y_pred))

In [22]:
with open('pipeline.pickle','wb') as f:
    pickle.dump(model, f)
    
with open('pipeline.pickle', 'rb') as f:
    loaded_pipe = pickle.load(f)
    
model_evaluate(loaded_pipe)

              precision    recall  f1-score   support

       DrugY       1.00      1.00      1.00        15
       drugA       1.00      1.00      1.00         6
       drugB       1.00      1.00      1.00         3
       drugC       1.00      1.00      1.00         5
       drugX       1.00      1.00      1.00        11

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

