## `Importing and displaying datasets`

### <i>All imports goes here!</i>

In [1]:
import pandas as pd
from IPython.display import display
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
import numpy as np
from sklearn import svm
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()

### <i>Reading the data</i>

In [2]:
result_2008 = pd.read_csv('Data/Cleaned_Data/cleaned_&_final_2008_result.csv')
result_2013 = pd.read_csv('Data/Cleaned_Data/cleaned_&_final_2013_result.csv')
result_2017 = pd.read_csv('Data/Cleaned_Data/cleaned_&_final_2017_result.csv')
result_2022 = pd.read_csv('Data/Cleaned_Data/cleaned_&_final_2022_result.csv')

display(result_2008.head())
display(result_2013.head())
display(result_2017.head())
display(result_2022.head())

Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Achham,Achham-1,uml,2119.0358,40,54.53
1,1,Achham,Achham-2,maoist,1708.727,32,52.82
2,2,Arghakhanchi,Arghakhanchi-1,maoist,1966.7836,46,55.34
3,3,Arghakhanchi,Arghakhanchi-2,nepalicongress,1755.126,46,52.05
4,4,Baglung,Baglung-1,maoist,1828.5047,48,60.01


Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Taplejung,Taplejung-1,Nepal Communist Party (UML),7434.0,37.0,73.12
1,1,Taplejung,Taplejung-2,Nepal Communist Party (UML),7034.0,48.0,72.6
2,2,Panchthar,Panchthar-1,Nepal Communist Party (UML),13082.0,26.0,72.18
3,3,Panchthar,Panchthar-2,Nepali Congress,11839.0,41.0,74.58
4,4,Illam,Illam-1,Nepal Communist Party (UML),17342.0,55.0,78.58


Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Taplejung,Taplejung-1,Nepal Communist Party (UML),15417,46,59.060916
1,1,Taplejung,Taplejung-1,Nepali Congress,10974,51,62.215124
2,2,Taplejung,Taplejung-1,Federal Democratic National Front,861,57,54.605809
3,3,Taplejung,Taplejung-1,Naya Shakti Party Nepal,267,41,52.116411
4,4,Taplejung,Taplejung-1,Federal Socialist Forum Nepal,158,45,67.28869


Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Taplejung,Taplejung-1,Nepal Communist Party (UML),21943,56,49.859126
1,1,Taplejung,Taplejung-1,Nepal Communist Party (Maoist Center) (Unified...,21735,46,53.744678
2,2,Taplejung,Taplejung-1,Federal Democratic National Front,941,56,54.895159
3,3,Taplejung,Taplejung-1,"Janata Samajwadi Party, Nepal",628,49,51.052482
4,4,Taplejung,Taplejung-1,Rastriya Prajatantra Party,406,65,54.328004


## `Combining all datasets`

In [3]:
file_paths = [
    "Data/Cleaned_Data/cleaned_&_final_2008_result.csv",
    "Data/Cleaned_Data/cleaned_&_final_2013_result.csv",
    "Data/Cleaned_Data/cleaned_&_final_2017_result.csv",
    "Data/Cleaned_Data/cleaned_&_final_2022_result.csv",
]

# Combining all datasets into one DataFrame
dataframes = [pd.read_csv(file) for file in file_paths]
combined_data = pd.concat(dataframes, ignore_index=True)

# Displaying the shape of the combined dataset
print("Combined Dataset Shape:", combined_data.shape)


Combined Dataset Shape: (6165, 7)


## `Data Cleaning (if necessary)`

### <i>Checking if there are any missing values</i>

In [4]:
missing_values = combined_data.isnull().sum()
print("Missing Values:\n", missing_values)


Missing Values:
 Unnamed: 0     0
District       0
Const          0
WParty        47
WVotes        47
Age            0
TurnOut%       0
dtype: int64


### <i>Handling missing values</i>

In [5]:
# Using info to check the data type of missing value columns i.e., WParty and WVotes 
combined_data.info()
# Data type found to be oject and float64 for WParty and WVotes respectively

# For WParty which is of object datatype
for column in combined_data['WParty']:
    
    combined_data['WParty'].fillna(combined_data['WParty'].mode()[0], inplace=True)
    
# For WVoteswhich is of float datatype
for column in combined_data['WVotes']:
       
    combined_data['WVotes'].fillna(combined_data['WVotes'].median(), inplace=True)

# Checking if there are still missing values that persist
missing_values = combined_data.isnull().sum()
print("Missing Values:\n", missing_values)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6165 entries, 0 to 6164
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  6165 non-null   int64  
 1   District    6165 non-null   object 
 2   Const       6165 non-null   object 
 3   WParty      6118 non-null   object 
 4   WVotes      6118 non-null   float64
 5   Age         6165 non-null   float64
 6   TurnOut%    6165 non-null   float64
dtypes: float64(3), int64(1), object(3)
memory usage: 337.3+ KB


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  combined_data['WParty'].fillna(combined_data['WParty'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  combined_data['WVotes'].fillna(combined_data['WVotes'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never wo

Missing Values:
 Unnamed: 0    0
District      0
Const         0
WParty        0
WVotes        0
Age           0
TurnOut%      0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  combined_data['WVotes'].fillna(combined_data['WVotes'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  combined_data['WVotes'].fillna(combined_data['WVotes'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never wor

### <i>Checking if there are any duplicate values</i>

In [6]:
duplicates = combined_data.duplicated().sum()
print(f"Number of duplicate rows: {duplicates}")

Number of duplicate rows: 0


### <i>Checking if there are any inconsistencies in data</i>

In [7]:
# Standardizing case and remove leading/trailing spaces
# combined_data['District'] = combined_data['District'].str.lower().str.strip()

mapping_party_name_inconsistencies = {
    'uml' : 'Nepal Communist Party (UML)',
    'nepalicongress':'Nepali Congress',
    'maoist':'Nepal Communist Party (Maoist)',
    'Nepali Congress (BP)':'Nepali Congress',
}

combined_data['WParty'] = combined_data['WParty'].replace(mapping_party_name_inconsistencies)
combined_data.head()

Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Achham,Achham-1,Nepal Communist Party (UML),2119.0358,40.0,54.53
1,1,Achham,Achham-2,Nepal Communist Party (Maoist),1708.727,32.0,52.82
2,2,Arghakhanchi,Arghakhanchi-1,Nepal Communist Party (Maoist),1966.7836,46.0,55.34
3,3,Arghakhanchi,Arghakhanchi-2,Nepali Congress,1755.126,46.0,52.05
4,4,Baglung,Baglung-1,Nepal Communist Party (Maoist),1828.5047,48.0,60.01


## `Feature engineering`

### <i>Dropping Unwanted Columns</i>

In [8]:
combined_data = combined_data.drop(columns=['Unnamed: 0'])

### <i>Encoding categorical Variables</i>

#### <i><u>Performing target variable encoding for WParty</u></i>

In [9]:

encoder = LabelEncoder()

combined_data['WParty_Encoded'] = encoder.fit_transform(combined_data['WParty'])

combined_data.head()

Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%,WParty_Encoded
0,Achham,Achham-1,Nepal Communist Party (UML),2119.0358,40.0,54.53,48
1,Achham,Achham-2,Nepal Communist Party (Maoist),1708.727,32.0,52.82,45
2,Arghakhanchi,Arghakhanchi-1,Nepal Communist Party (Maoist),1966.7836,46.0,55.34,45
3,Arghakhanchi,Arghakhanchi-2,Nepali Congress,1755.126,46.0,52.05,66
4,Baglung,Baglung-1,Nepal Communist Party (Maoist),1828.5047,48.0,60.01,45


#### <i><u>Performing one-hot-encoding for District and Const</u></i>

In [10]:
combined_data = pd.get_dummies(combined_data, columns=['District'], drop_first=True)
combined_data = pd.get_dummies(combined_data, columns=['Const'], drop_first=True)

combined_data.head()

Unnamed: 0,WParty,WVotes,Age,TurnOut%,WParty_Encoded,District_Arghakhanchi,District_Baglung,District_Baitadi,District_Bajhang,District_Bajura,...,Const_Taplejung-2,Const_Tehrathum-1,Const_Tehrathum-2,Const_Terhathum-1,Const_Udayapur-1,Const_Udayapur-2,Const_Udayapur-3,Const_Udaypur-1,Const_Udaypur-2,Const_Udaypur-3
0,Nepal Communist Party (UML),2119.0358,40.0,54.53,48,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,Nepal Communist Party (Maoist),1708.727,32.0,52.82,45,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,Nepal Communist Party (Maoist),1966.7836,46.0,55.34,45,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Nepali Congress,1755.126,46.0,52.05,66,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Nepal Communist Party (Maoist),1828.5047,48.0,60.01,45,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## `Model Implementation: Decision Trees`

#### <i>`Step 1:` Splitting the data into features and target</i>

In [11]:
# Features (excluding the target column)
X = combined_data.drop(columns=['WParty', 'WParty_Encoded'])

# Target variable
y = combined_data['WParty_Encoded']

#### <i>`Step 2:` Splitting the data into training and testing sets</i>

In [12]:
# Splitting the data (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


#### <i>`Step 3:` Initializing and training the logistic regression model</i>

In [13]:
dtc.fit(X_train,y_train)
# combined_data.head()

#### <i>`Step 4:` Making predictions</i>

In [14]:
y_pred = dtc.predict(X_test)

#### <i>`Step 5:` Evaluating the model</i>

In [15]:
print(classification_report(y_pred,y_test))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         2
           3       0.00      0.00      0.00         5
           4       0.10      0.09      0.09        35
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         3
           7       0.06      0.09      0.07        11
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00        35
          10       0.14      0.20      0.16        20
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         1
          13       0.05      0.08      0.07        36
          14       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         5
          17       0.00      0.00      0.00         0
          18       0.67    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
# Checking the importance of features

features = pd.DataFrame(dtc.feature_importances_, index=X.columns)
features.head(15)

Unnamed: 0,0
WVotes,0.299438
Age,0.136177
TurnOut%,0.173001
District_Arghakhanchi,0.001015
District_Baglung,0.002046
District_Baitadi,0.000719
District_Bajhang,0.000522
District_Bajura,0.000295
District_Banke,0.002899
District_Bara,0.005195


In [17]:
# Solving overfitting of data

dtc2 = DecisionTreeClassifier(criterion='entropy',ccp_alpha=0.02)

dtc2.fit(X_train,y_train)
y_pred2 = dtc2.predict(X_test)

In [18]:
# Checking the classification report again

print(classification_report(y_pred2,y_test))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         0
          17       0.00      0.00      0.00         0
          18       0.93      0.42      0.58      1182
          19       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [19]:
y_probs = dtc2.predict_proba(X_test)

# roc_auc = roc_auc_score(y_test,y_probs, multi_class='ovr')
# print(f"ROC-AUC: {roc_auc:.2f}")

## `Model Implementation: Logistic Regression`

In [20]:
# Creating the model

log_rec = LogisticRegression()

log_rec.fit(X_train,y_train)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [21]:
# Making prediction using the model

y_pred3 = log_rec.predict(X_test)

In [22]:
# Checking the performance of the model using Classification Report

print(classification_report(y_pred3, y_test))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         0
          17       0.00      0.00      0.00         0
          18       0.98      0.36      0.53      1478
          19       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [23]:
# ROC-AUC for logistic regression model here

## `Model Implementation: SVM (Support Vector Machine)`

In [24]:
model = SVC(random_state=42)

model.fit(X_train,y_train)

In [25]:
# Predicting using SVM model
y_pred4 = model.predict(X_test)

In [26]:
# Checking the performance of the model using Classification Report
print(classification_report(y_pred4,y_test))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.00      0.00      0.00         0
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0
          10       0.00      0.00      0.00         0
          11       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          13       0.00      0.00      0.00         0
          14       0.00      0.00      0.00         0
          16       0.00      0.00      0.00         0
          17       0.00      0.00      0.00         0
          18       0.97      0.37      0.54      1401
          19       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [27]:
# ROC-AUC for logistic regression model here

In [28]:
cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy')



array([0.35858235, 0.36926287, 0.36369958])

In [29]:
# from sklearn.model_selection import RandomizedSearchCV
# from sklearn.svm import SVC
# from sklearn.metrics import classification_report
# from scipy.stats import uniform

# # Define smaller parameter distribution
# param_dist = {
#     'C': uniform(0.1, 10),  # Narrower range
#     'gamma': uniform(0.01, 1),  # Focused range
#     'kernel': ['rbf']  # Fixed kernel
# }

# # Randomized search with fewer iterations
# random_search = RandomizedSearchCV(estimator=SVC(), param_distributions=param_dist,
#                                    n_iter=20, cv=3, verbose=2, scoring='accuracy', n_jobs=-1, random_state=42)

# # Fit the model
# random_search.fit(X_train, y_train)

# # Results
# print("Best Parameters:", random_search.best_params_)
# print("Best Cross-Validation Accuracy:", random_search.best_score_)


# # Define smaller parameter distribution
# param_dist = {
#     'C': uniform(0.1, 10),  # Narrower range
#     'gamma': uniform(0.01, 1),  # Focused range
#     'kernel': ['rbf']  # Fixed kernel
# }

# # Randomized search with fewer iterations
# random_search = RandomizedSearchCV(estimator=SVC(), param_distributions=param_dist,
#                                    n_iter=20, cv=3, verbose=2, scoring='accuracy', n_jobs=-1, random_state=42)

# # Fit the model
# random_search.fit(X_train, y_train)

# # Results
# print("Best Parameters:", random_search.best_params_)
# print("Best Cross-Validation Accuracy:", random_search.best_score_)

In [30]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from scipy.stats import uniform, randint

# Define hyperparameter grids for each model
param_distributions = {
    'svm': {
        'C': uniform(0.1, 10),
        'gamma': uniform(0.01, 1),
        'kernel': ['rbf']
    },
    'logistic_regression': {
        'C': uniform(0.1, 10),  # Inverse of regularization strength
        'penalty': ['l2', 'none'],  # Regularization types
        'solver': ['lbfgs', 'saga'],  # Solvers compatible with l2 and none
        'max_iter': [100, 200, 500]  # Iteration limits
    },
    'decision_tree': {
        'max_depth': randint(3, 20),  # Maximum depth of the tree
        'min_samples_split': randint(2, 20),  # Minimum number of samples to split
        'min_samples_leaf': randint(1, 10),  # Minimum samples per leaf node
        'criterion': ['gini', 'entropy']  # Splitting criteria
    }
}

# Define models
models = {
    'svm': SVC(),
    'logistic_regression': LogisticRegression(),
    'decision_tree': DecisionTreeClassifier()
}

# Store results
best_params = {}
best_scores = {}

# Perform RandomizedSearchCV for each model
for model_name, model in models.items():
    print(f"Tuning {model_name}...")
    random_search = RandomizedSearchCV(
        estimator=model,
        param_distributions=param_distributions[model_name],
        n_iter=20,
        cv=3,
        verbose=2,
        scoring='accuracy',
        n_jobs=-1,
        random_state=42
    )
    # Fit the model
    random_search.fit(X_train, y_train)
    
    # Store the best results
    best_params[model_name] = random_search.best_params_
    best_scores[model_name] = random_search.best_score_

    # Output results for each model
    print(f"Best Parameters for {model_name}: {random_search.best_params_}")
    print(f"Best Cross-Validation Accuracy for {model_name}: {random_search.best_score_:.4f}")
    print("-" * 50)

# Print all results
print("Summary of Best Parameters and Scores:")
for model_name in models.keys():
    print(f"{model_name}:")
    print(f"  Best Parameters: {best_params[model_name]}")
    print(f"  Best Cross-Validation Accuracy: {best_scores[model_name]:.4f}")
    print()


Tuning svm...
Fitting 3 folds for each of 20 candidates, totalling 60 fits




[CV] END C=3.845401188473625, gamma=0.9607143064099162, kernel=rbf; total time= 1.1min
[CV] END C=0.6808361216819946, gamma=0.8761761457749352, kernel=rbf; total time= 1.2min
[CV] END C=1.6601864044243653, gamma=0.16599452033620266, kernel=rbf; total time= 1.2min
[CV] END C=1.6601864044243653, gamma=0.16599452033620266, kernel=rbf; total time= 1.2min
[CV] END C=0.6808361216819946, gamma=0.8761761457749352, kernel=rbf; total time= 1.3min
[CV] END C=7.41993941811405, gamma=0.6086584841970366, kernel=rbf; total time= 1.3min
[CV] END C=7.41993941811405, gamma=0.6086584841970366, kernel=rbf; total time= 1.3min
[CV] END C=0.6808361216819946, gamma=0.8761761457749352, kernel=rbf; total time= 1.3min
[CV] END C=1.6601864044243653, gamma=0.16599452033620266, kernel=rbf; total time= 1.4min
[CV] END C=3.845401188473625, gamma=0.9607143064099162, kernel=rbf; total time= 1.5min
[CV] END C=7.41993941811405, gamma=0.6086584841970366, kernel=rbf; total time= 1.5min
[CV] END C=3.845401188473625, gamma=0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[CV] END C=3.845401188473625, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.6s
[CV] END C=6.2165316048828085, max_iter=100, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=6.2165316048828085, max_iter=100, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=6.2165316048828085, max_iter=100, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=3.845401188473625, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.6s
[CV] END C=3.845401188473625, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.7s




[CV] END C=1.9182496720710063, max_iter=100, penalty=l2, solver=saga; total time=  32.7s
[CV] END C=0.5666566321361542, max_iter=500, penalty=none, solver=saga; total time=   0.0s
[CV] END C=0.5666566321361542, max_iter=500, penalty=none, solver=saga; total time=   0.0s
[CV] END C=0.5666566321361542, max_iter=500, penalty=none, solver=saga; total time=   0.0s
[CV] END C=7.951759613930136, max_iter=500, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=7.951759613930136, max_iter=500, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=7.951759613930136, max_iter=500, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=7.896910002727692, max_iter=100, penalty=l2, solver=saga; total time=  32.8s




[CV] END C=1.9182496720710063, max_iter=100, penalty=l2, solver=saga; total time=  33.4s




[CV] END C=7.896910002727692, max_iter=100, penalty=l2, solver=saga; total time=  33.8s
[CV] END C=0.2326496115986653, max_iter=100, penalty=none, solver=saga; total time=   0.0s
[CV] END C=0.2326496115986653, max_iter=100, penalty=none, solver=saga; total time=   0.0s
[CV] END C=0.2326496115986653, max_iter=100, penalty=none, solver=saga; total time=   0.0s
[CV] END C=8.18397348116461, max_iter=100, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=8.18397348116461, max_iter=100, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=8.18397348116461, max_iter=100, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=7.896910002727692, max_iter=100, penalty=l2, solver=saga; total time=  33.8s
[CV] END C=2.40893825622149, max_iter=500, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=2.40893825622149, max_iter=500, penalty=none, solver=lbfgs; total time=   0.0s
[CV] END C=2.40893825622149, max_iter=500, penalty=none, solver=lbfgs; total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=9.932308858067882, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.7s
[CV] END C=9.932308858067882, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.6s
[CV] END C=1.9182496720710063, max_iter=100, penalty=l2, solver=saga; total time=  35.4s
[CV] END C=4.010606075732408, max_iter=200, penalty=none, solver=saga; total time=   0.0s
[CV] END C=4.010606075732408, max_iter=200, penalty=none, solver=saga; total time=   0.0s
[CV] END C=4.010606075732408, max_iter=200, penalty=none, solver=saga; total time=   0.0s
[CV] END C=3.21711076089411, max_iter=200, penalty=none, solver=saga; total time=   0.0s
[CV] END C=3.21711076089411, max_iter=200, penalty=none, solver=saga; total time=   0.0s
[CV] END C=3.21711076089411, max_iter=200, penalty=none, solver=saga; total time=   0.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=6.175448519014383, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=6.175448519014383, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.6s
[CV] END C=9.932308858067882, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[CV] END C=6.175448519014383, max_iter=100, penalty=l2, solver=lbfgs; total time=   2.6s
[CV] END C=7.851328233611145, max_iter=200, penalty=none, solver=saga; total time=   0.0s
[CV] END C=7.851328233611145, max_iter=200, penalty=none, solver=saga; total time=   0.0s
[CV] END C=7.851328233611145, max_iter=200, penalty=none, solver=saga; total time=   0.0s




[CV] END C=5.777003278199914, max_iter=100, penalty=l2, solver=saga; total time=  32.5s




[CV] END C=5.777003278199914, max_iter=100, penalty=l2, solver=saga; total time=  32.3s
[CV] END C=5.777003278199914, max_iter=100, penalty=l2, solver=saga; total time=  32.5s




[CV] END C=1.6599452033620266, max_iter=500, penalty=l2, solver=saga; total time= 2.4min




[CV] END C=3.0122914019804194, max_iter=500, penalty=l2, solver=saga; total time= 2.4min
[CV] END C=1.6599452033620266, max_iter=500, penalty=l2, solver=saga; total time= 2.5min




[CV] END C=1.6599452033620266, max_iter=500, penalty=l2, solver=saga; total time= 2.5min




[CV] END C=3.0122914019804194, max_iter=500, penalty=l2, solver=saga; total time= 2.4min




[CV] END C=3.0122914019804194, max_iter=500, penalty=l2, solver=saga; total time= 2.5min




[CV] END C=6.199966577826209, max_iter=500, penalty=l2, solver=saga; total time= 2.2min




[CV] END C=6.199966577826209, max_iter=500, penalty=l2, solver=saga; total time= 2.2min


33 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
17 fits failed with the following error:
Traceback (most recent call last):
  File "/home/root123/.local/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/root123/.local/lib/python3.10/site-packages/sklearn/base.py", line 1466, in wrapper
    estimator._validate_params()
  File "/home/root123/.local/lib/python3.10/site-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "/home/root123/.local/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 95, in validate_parameter_constraint

[CV] END C=6.199966577826209, max_iter=500, penalty=l2, solver=saga; total time= 1.5min
Best Parameters for logistic_regression: {'C': np.float64(1.6599452033620266), 'max_iter': 500, 'penalty': 'l2', 'solver': 'saga'}
Best Cross-Validation Accuracy for logistic_regression: 0.3585
--------------------------------------------------
Tuning decision_tree...
Fitting 3 folds for each of 20 candidates, totalling 60 fits




[CV] END criterion=gini, max_depth=17, min_samples_leaf=8, min_samples_split=8; total time=   0.1s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=6, min_samples_split=3; total time=   0.1s
[CV] END criterion=gini, max_depth=17, min_samples_leaf=8, min_samples_split=8; total time=   0.1s
[CV] END criterion=gini, max_depth=17, min_samples_leaf=8, min_samples_split=8; total time=   0.1s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=6, min_samples_split=3; total time=   0.1s
[CV] END criterion=entropy, max_depth=13, min_samples_leaf=8, min_samples_split=5; total time=   0.1s
[CV] END criterion=entropy, max_depth=13, min_samples_leaf=8, min_samples_split=5; total time=   0.1s
[CV] END criterion=entropy, max_depth=5, min_samples_leaf=6, min_samples_split=3; total time=   0.1s
[CV] END criterion=entropy, max_depth=13, min_samples_leaf=8, min_samples_split=5; total time=   0.1s
[CV] END criterion=entropy, max_depth=14, min_samples_leaf=6, min_samples_split=3; total time=