In [1]:
pip install deap

Collecting deap
  Downloading deap-1.4.3-cp312-cp312-win_amd64.whl.metadata (13 kB)
Downloading deap-1.4.3-cp312-cp312-win_amd64.whl (109 kB)
Installing collected packages: deap
Successfully installed deap-1.4.3
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import random
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from deap import creator, base, tools, algorithms

In [3]:
df = pd.read_csv("spam.csv")
df.head()

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [4]:
df.isnull().sum()

Category    0
Message     0
dtype: int64

In [5]:
vectorizer = TfidfVectorizer(stop_words = 'english', max_features=300)
X = vectorizer.fit_transform(df['Message']).toarray()
y = df['Category'].apply(lambda x: 1 if x == 'spam' else 0)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state=42)

In [6]:
#Setting Up DEAP for GA
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()               

In [7]:
toolbox.register("attr_int", random.randint, 10,100)
toolbox.register("attr_float", random.uniform, 0.001,0.1)
toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.attr_int, toolbox.attr_float), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

def evaluate(individual):
    hidden_layer_size = int(individual[0])
    learning_rate = individual[1]
    learning_rate = max(0.0001, min(0.1, learning_rate))
    # Create and train neural network model
    model = MLPClassifier(hidden_layer_sizes = (hidden_layer_size,), learning_rate_init = learning_rate, max_iter = 300, random_state = 42)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    return (acc,)

toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.1, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)

In [8]:
# Run Genetic Algorithm
pop = toolbox.population(n = 10)
algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen = 5, verbose=True)

gen	nevals
0  	10    
1  	7     
2  	9     
3  	5     
4  	6     
5  	5     


([[35, 0.06411081807775876],
  [35, 0.06411081807775876],
  [34.82696150735803, 0.06411081807775876],
  [35, 0.06411081807775876],
  [35, 0.06411081807775876],
  [35, 0.06411081807775876],
  [35, 0.06411081807775876],
  [35, 0.06411081807775876],
  [35, 0.06411081807775876],
  [35, 0.06411081807775876]],
 [{'gen': 0, 'nevals': 10},
  {'gen': 1, 'nevals': 7},
  {'gen': 2, 'nevals': 9},
  {'gen': 3, 'nevals': 5},
  {'gen': 4, 'nevals': 6},
  {'gen': 5, 'nevals': 5}])

In [9]:
best_ind = tools.selBest(pop, k=1)[0]
print("The best (Hidden Layer, Learning_rate) is: ",best_ind)

The best (Hidden Layer, Learning_rate) is:  [35, 0.06411081807775876]


In [10]:
final_model = MLPClassifier(hidden_layer_sizes=(int(best_ind[0]),),learning_rate_init=best_ind[1], max_iter = 300)
final_model.fit(X_train, y_train)

In [11]:
final_preds = final_model.predict(X_test)
final_acc = accuracy_score(y_test, final_preds)
print("Final Accuracy: {:.2f}%".format(final_acc * 100))                          

Final Accuracy: 97.97%
