### Loading the relevant libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn import model_selection
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

### Building the machine learning model

#### Loading and pre-processing the dataset:


In [None]:
# Loading the dataset from csv file
data = pd.read_csv('')

In [None]:
## Getting a general overview of the properties of the dataset
# Defining a function to check the general properties of the dataset
def resumetable(df):
    print(f"Dataset Shape: {df.shape}")
    summary = pd.DataFrame(df.dtypes,columns=['dtypes'])
    summary = summary.reset_index()
    summary['Name'] = summary['index']
    summary = summary[['Name','dtypes']]
    summary['Missing'] = df.isnull().sum().values    
    summary['Uniques'] = df.nunique().values
    
    return summary
print(resumetable(data))

#Plotting the distribution of different classes 
plt.figure(figsize=(10,4))
data.Condition.value_counts().plot(kind='bar')

#### Feature engineering and model selection:

In [None]:
# Input data
X = data['text']
target = data['target']

# Encoding the target classes
LE = LabelEncoder()
y = LE.fit_transform(target)

# Splitting the data into training and testing 
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)

# TFIDF vectorizer
tfidf = TfidfVectorizer(sublinear_tf= True, 
                       min_df = 5, 
                       norm= 'l2', 
                       ngram_range= (1,2), 
                       stop_words ='english')

# Defining the pipeline: TFIDF + SVM
classifier = Pipeline(steps=[
('features', tfidf),
('model', SVC())
])

# Fitting the training data to the classifier
classifier.fit(X_train.values.astype('U'), y_train)

In [None]:
# Evaluating the model
y_pred = classifier.predict(X_test.values.astype('U'))
y_true = y_test
print(classification_report(y_true, y_pred))