# **Import Library**

In [1]:
# pandas: A powerful data manipulation and analysis library.
# - It is used for handling and analyzing structured data. It provides data structures like DataFrame 
#   and Series to work with heterogeneous data in tabular form (similar to SQL tables or Excel sheets).
# - Useful for data cleaning, transformation, and analysis.

import pandas as pd

# numpy: A package for scientific computing with support for large, multi-dimensional arrays and matrices.
# - It provides tools for numerical computations, including support for arrays, random number generation, 
#   and linear algebra operations.
# - Essential for working with numerical data and performing mathematical operations.

import numpy as np

# seaborn: A Python visualization library based on matplotlib that provides a high-level interface for drawing 
# attractive statistical graphics.
# - Useful for creating aesthetically pleasing and informative data visualizations, especially for 
#   exploring relationships in data.
# - Works well with pandas DataFrames for easy plotting.

import seaborn as sns

# matplotlib: A popular 2D plotting library for Python.
# - Used to create a variety of static, animated, and interactive visualizations, including line plots, 
#   histograms, scatter plots, etc.
# - Often used in conjunction with seaborn for more advanced visualizations.

import matplotlib.pyplot as plt

# sklearn.model_selection.train_test_split: A utility function to split datasets into training and testing sets.
# - It is used for dividing your dataset into a training set (to train the model) and a test set 
#   (to evaluate the model's performance).
# - Helps avoid overfitting by validating the model on unseen data.

from sklearn.model_selection import train_test_split

# sklearn.preprocessing: A module that provides various methods for scaling and encoding features.
# - LabelEncoder: Converts categorical labels into numeric labels.
# - StandardScaler: Standardizes the features by removing the mean and scaling to unit variance.
# - MinMaxScaler: Scales features to a given range, often [0, 1].

from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler

# sklearn.neighbors.KNeighborsClassifier: An implementation of the K-Nearest Neighbors (KNN) algorithm.
# - KNN is a simple, instance-based learning algorithm used for classification or regression.
# - It works by finding the closest training examples in the feature space and using their labels 
#   to predict the label for new instances.

from sklearn.neighbors import KNeighborsClassifier

# sklearn.tree.DecisionTreeClassifier: A classifier that builds a decision tree based on feature values.
# - Decision trees partition the feature space into regions and assign a label to each region.
# - Used for classification tasks and interpretable models where the decisions can be traced back 
#   to the tree structure.

from sklearn.tree import DecisionTreeClassifier

# sklearn.ensemble.RandomForestClassifier: An ensemble method that combines multiple decision trees to 
# improve classification accuracy.
# - It builds multiple decision trees using random subsets of the data and features, which helps 
#   reduce overfitting and improves generalization.

from sklearn.ensemble import RandomForestClassifier

# sklearn.svm.SVC: A support vector machine classifier for binary or multi-class classification tasks.
# - SVMs work by finding a hyperplane that best separates data points of different classes.
# - Good at handling high-dimensional data and effective for complex datasets.

from sklearn.svm import SVC

# sklearn.naive_bayes.GaussianNB: A Naive Bayes classifier that assumes features follow a Gaussian (normal) distribution.
# - It is used for classification problems where the features are assumed to be independent.
# - Typically used for text classification and spam filtering.

from sklearn.naive_bayes import GaussianNB

# sklearn.metrics: A module that provides functions to evaluate model performance.
# - confusion_matrix: Computes the confusion matrix to evaluate classification results.
# - accuracy_score: Calculates the ratio of correctly predicted instances.
# - precision_score: Measures the accuracy of positive predictions.
# - recall_score: Measures the ability of the model to identify all positive samples.
# - f1_score: Harmonic mean of precision and recall, useful for imbalanced datasets.

from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score