In [1]:
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor


In [2]:
df = pd.read_csv("dataset/googleplaystore.csv")

In [3]:
df.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up


In [4]:
df.shape

(10841, 13)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10841 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             10841 non-null  object 
 1   Category        10841 non-null  object 
 2   Rating          9367 non-null   float64
 3   Reviews         10841 non-null  object 
 4   Size            10841 non-null  object 
 5   Installs        10841 non-null  object 
 6   Type            10840 non-null  object 
 7   Price           10841 non-null  object 
 8   Content Rating  10840 non-null  object 
 9   Genres          10841 non-null  object 
 10  Last Updated    10841 non-null  object 
 11  Current Ver     10833 non-null  object 
 12  Android Ver     10838 non-null  object 
dtypes: float64(1), object(12)
memory usage: 1.1+ MB


In [6]:
df.isnull().sum()

App                  0
Category             0
Rating            1474
Reviews              0
Size                 0
Installs             0
Type                 1
Price                0
Content Rating       1
Genres               0
Last Updated         0
Current Ver          8
Android Ver          3
dtype: int64

In [7]:
def value_to_float(x):
    # Convert the string feature into float/Integer
    #If there is 'K' or 'M' in the string, convert it to the corresponding number (1000 or 1000000)
    
    if type(x) == float or type(x) == int:
        return x
    if 'K' in x:
        if len(x) > 1:
            return float(x.replace('K', '')) * 10**3
        return 1000.0
    if 'M' in x:
        if len(x) > 1:
            return float(x.replace('M', '')) * 10**6
        return 1000000.0
    
    # If the string cannot be converted, return 0 instead
    try:
        parsed_val = float(x)
    except ValueError:
        parsed_val = 0.0
    return parsed_val

In [8]:
df = df.loc[df["Rating"].notnull()]
df = df.loc[df["Rating"] <= 5]

df["Price"] = df["Price"].apply(lambda x: x.replace('$', ''))
df["Installs"] = df["Installs"].apply(lambda x: x.replace('+', ''))
df['Installs'] = df['Installs'].apply(lambda a: str(a).replace(',', '') if ',' in str(a) else a)
df['Installs'] = df['Installs'].apply(lambda a : int(a))
df['Reviews'] = df['Reviews'].apply(lambda a : int(a))

genres = df["Genres"].value_counts().head().index

str_cols = ["Size", "Price"]

for col in str_cols:
    df[[col]] = df[[col]].fillna(value="")
    df[col] = df[col].apply(value_to_float)

In [9]:
df.dropna(inplace=True)

In [10]:
df.isnull().sum()

App               0
Category          0
Rating            0
Reviews           0
Size              0
Installs          0
Type              0
Price             0
Content Rating    0
Genres            0
Last Updated      0
Current Ver       0
Android Ver       0
dtype: int64

In [11]:
df.shape

(9360, 13)

In [12]:
df.describe()

Unnamed: 0,Rating,Reviews,Size,Installs,Price
count,9360.0,9360.0,9360.0,9360.0,9360.0
mean,4.191838,514376.7,18941240.0,17908750.0,0.961279
std,0.515263,3145023.0,23028130.0,91266370.0,15.82164
min,1.0,1.0,0.0,1.0,0.0
25%,4.0,186.75,2400000.0,10000.0,0.0
50%,4.3,5955.0,9500000.0,500000.0,0.0
75%,4.5,81627.5,27000000.0,5000000.0,0.0
max,5.0,78158310.0,100000000.0,1000000000.0,400.0


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9360 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             9360 non-null   object 
 1   Category        9360 non-null   object 
 2   Rating          9360 non-null   float64
 3   Reviews         9360 non-null   int64  
 4   Size            9360 non-null   float64
 5   Installs        9360 non-null   int64  
 6   Type            9360 non-null   object 
 7   Price           9360 non-null   float64
 8   Content Rating  9360 non-null   object 
 9   Genres          9360 non-null   object 
 10  Last Updated    9360 non-null   object 
 11  Current Ver     9360 non-null   object 
 12  Android Ver     9360 non-null   object 
dtypes: float64(3), int64(2), object(8)
memory usage: 1023.8+ KB


In [14]:
# There are 7 records where Reviews are greater than Installs 
df[df['Reviews'] > df['Installs']].shape

(7, 13)

In [15]:
# Dropping 7 records that have greater Reviews than Installs
df.drop(df[df['Reviews'] > df['Installs']].index,inplace=True)
df[df['Reviews'] > df['Installs']].shape

(0, 13)

In [16]:
# Dropping rows that have Price > 200
df.drop(df[df['Price'] > 200].index,inplace=True)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9338 entries, 0 to 10840
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   App             9338 non-null   object 
 1   Category        9338 non-null   object 
 2   Rating          9338 non-null   float64
 3   Reviews         9338 non-null   int64  
 4   Size            9338 non-null   float64
 5   Installs        9338 non-null   int64  
 6   Type            9338 non-null   object 
 7   Price           9338 non-null   float64
 8   Content Rating  9338 non-null   object 
 9   Genres          9338 non-null   object 
 10  Last Updated    9338 non-null   object 
 11  Current Ver     9338 non-null   object 
 12  Android Ver     9338 non-null   object 
dtypes: float64(3), int64(2), object(8)
memory usage: 1021.3+ KB


In [18]:
df = df[df['Category'] != '1.9']
value_counts = df['Category'].value_counts()
print(value_counts)

Category
FAMILY                 1742
GAME                   1093
TOOLS                   733
PRODUCTIVITY            351
MEDICAL                 349
COMMUNICATION           328
SPORTS                  319
FINANCE                 317
PHOTOGRAPHY             317
PERSONALIZATION         312
LIFESTYLE               308
BUSINESS                303
HEALTH_AND_FITNESS      297
SOCIAL                  258
SHOPPING                238
NEWS_AND_MAGAZINES      233
TRAVEL_AND_LOCAL        226
DATING                  195
BOOKS_AND_REFERENCE     178
VIDEO_PLAYERS           160
EDUCATION               155
ENTERTAINMENT           149
MAPS_AND_NAVIGATION     124
FOOD_AND_DRINK          109
HOUSE_AND_HOME           76
WEATHER                  75
AUTO_AND_VEHICLES        73
LIBRARIES_AND_DEMO       64
ART_AND_DESIGN           61
COMICS                   58
PARENTING                50
EVENTS                   45
BEAUTY                   42
Name: count, dtype: int64


In [19]:
category = pd.get_dummies(df['Category'], dtype='uint8')
df = pd.concat([df, category], axis=1)


In [20]:
df.drop('Category', axis=1, inplace=True)

In [21]:
df['Rating'].value_counts()

Rating
4.4    1107
4.3    1074
4.5    1037
4.2     951
4.6     823
4.1     705
4.0     565
4.7     499
3.9     386
3.8     299
5.0     268
3.7     239
4.8     233
3.6     172
3.5     162
3.4     128
3.3     102
4.9      87
3.0      83
3.1      69
3.2      63
2.9      44
2.8      42
2.7      25
2.6      25
2.5      21
2.3      20
2.4      19
1.0      16
2.2      14
1.9      13
2.0      12
1.7       8
1.8       8
2.1       8
1.6       4
1.4       3
1.5       3
1.2       1
Name: count, dtype: int64

In [22]:
print(df['Content Rating'].value_counts())

Content Rating
Everyone           7394
Teen               1082
Mature 17+          461
Everyone 10+        397
Adults only 18+       3
Unrated               1
Name: count, dtype: int64


In [23]:
df = df[df['Content Rating'] != 'Unrated']


In [24]:
content_rating = pd.get_dummies(df['Content Rating'], dtype='uint8')

df = pd.concat([df, content_rating], axis=1)

df.drop('Content Rating', axis=1, inplace=True)


In [25]:
df.head(6)

Unnamed: 0,App,Rating,Reviews,Size,Installs,Type,Price,Genres,Last Updated,Current Ver,...,SPORTS,TOOLS,TRAVEL_AND_LOCAL,VIDEO_PLAYERS,WEATHER,Adults only 18+,Everyone,Everyone 10+,Mature 17+,Teen
0,Photo Editor & Candy Camera & Grid & ScrapBook,4.1,159,19000000.0,10000,Free,0.0,Art & Design,"January 7, 2018",1.0.0,...,0,0,0,0,0,0,1,0,0,0
1,Coloring book moana,3.9,967,14000000.0,500000,Free,0.0,Art & Design;Pretend Play,"January 15, 2018",2.0.0,...,0,0,0,0,0,0,1,0,0,0
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",4.7,87510,8700000.0,5000000,Free,0.0,Art & Design,"August 1, 2018",1.2.4,...,0,0,0,0,0,0,1,0,0,0
3,Sketch - Draw & Paint,4.5,215644,25000000.0,50000000,Free,0.0,Art & Design,"June 8, 2018",Varies with device,...,0,0,0,0,0,0,0,0,0,1
4,Pixel Draw - Number Art Coloring Book,4.3,967,2800000.0,100000,Free,0.0,Art & Design;Creativity,"June 20, 2018",1.1,...,0,0,0,0,0,0,1,0,0,0
5,Paper flowers instructions,4.4,167,5600000.0,50000,Free,0.0,Art & Design,"March 26, 2017",1.0,...,0,0,0,0,0,0,1,0,0,0


In [26]:
df = df.drop(['Last Updated', 'Current Ver', 'Android Ver','Type','App','Reviews', 'Installs','Genres'], axis=1)


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9337 entries, 0 to 10840
Data columns (total 41 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Rating               9337 non-null   float64
 1   Size                 9337 non-null   float64
 2   Price                9337 non-null   float64
 3   ART_AND_DESIGN       9337 non-null   uint8  
 4   AUTO_AND_VEHICLES    9337 non-null   uint8  
 5   BEAUTY               9337 non-null   uint8  
 6   BOOKS_AND_REFERENCE  9337 non-null   uint8  
 7   BUSINESS             9337 non-null   uint8  
 8   COMICS               9337 non-null   uint8  
 9   COMMUNICATION        9337 non-null   uint8  
 10  DATING               9337 non-null   uint8  
 11  EDUCATION            9337 non-null   uint8  
 12  ENTERTAINMENT        9337 non-null   uint8  
 13  EVENTS               9337 non-null   uint8  
 14  FAMILY               9337 non-null   uint8  
 15  FINANCE              9337 non-null   uint8

In [28]:
df['Size'].value_counts()

Size
0.0           1892
14000000.0     165
12000000.0     161
15000000.0     159
11000000.0     159
              ... 
65000000.0       9
89000000.0       9
86000000.0       8
90000000.0       5
1000000.0        4
Name: count, Length: 182, dtype: int64

In [29]:
df = df[df['Size'] != 0]

In [31]:
y = df[['Rating']]
x = df.drop('Rating',axis=1)

In [41]:
x.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7445 entries, 0 to 10840
Data columns (total 40 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Size                 7445 non-null   float64
 1   Price                7445 non-null   float64
 2   ART_AND_DESIGN       7445 non-null   uint8  
 3   AUTO_AND_VEHICLES    7445 non-null   uint8  
 4   BEAUTY               7445 non-null   uint8  
 5   BOOKS_AND_REFERENCE  7445 non-null   uint8  
 6   BUSINESS             7445 non-null   uint8  
 7   COMICS               7445 non-null   uint8  
 8   COMMUNICATION        7445 non-null   uint8  
 9   DATING               7445 non-null   uint8  
 10  EDUCATION            7445 non-null   uint8  
 11  ENTERTAINMENT        7445 non-null   uint8  
 12  EVENTS               7445 non-null   uint8  
 13  FAMILY               7445 non-null   uint8  
 14  FINANCE              7445 non-null   uint8  
 15  FOOD_AND_DRINK       7445 non-null   uint8

In [32]:
x.shape, y.shape

((7445, 40), (7445, 1))

In [33]:
X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=1)

In [34]:
import random
from deap import base, creator, tools, algorithms
import warnings
from sklearn.exceptions import DataConversionWarning
warnings.filterwarnings("ignore", category=DataConversionWarning)


In [35]:
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions on the test data
Y_pred = rf.predict(X_test)

mse = mean_squared_error(y_test, Y_pred)
print("Mean squared error: ", mse)

Mean squared error:  0.3458750919662488


In [36]:
# Create a fitness function that measures the performance of the model
def evaluate(individual):
    # Convert the individual into a list of hyperparameters
    n_estimators = individual[0]
    max_depth = individual[1]
    max_features = individual[2]
    min_samples_split = individual[3]
    min_samples_leaf = individual[4]
    
    # Create a random forest model with the given hyperparameters
    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf,random_state=42)
    
    # Train the model on the training data
    rf.fit(X_train, y_train)
    
    # Evaluate the model on the test data
    Y_pred = rf.predict(X_test)
    mse = mean_squared_error(y_test, Y_pred)
    print(mse)
    # Return the mean squared error as the fitness value
    return mse,

In [37]:

# Define the genetic algorithm parameters
toolbox = base.Toolbox()
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)
toolbox.register("attr_n_estimators", random.randint, 1, 1000)
toolbox.register("attr_max_depth", random.randint, 1, 50)
toolbox.register("attr_max_features", random.randint, 1, len(x.columns))
toolbox.register("attr_min_samples_split", random.randint, 2,50)
toolbox.register("attr_min_samples_leaf", random.randint, 1,50)
toolbox.register("individual", tools.initCycle, creator.Individual, (toolbox.attr_n_estimators, toolbox.attr_max_depth, toolbox.attr_max_features, toolbox.attr_min_samples_split, toolbox.attr_min_samples_leaf), n=1)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxOnePoint)
toolbox.register("mutate", tools.mutUniformInt, low=2, up=len(x.columns), indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)

# Run the genetic algorithm
population = toolbox.population(n=50)
algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=5)


0.2997397758978961
0.30223796269540704
0.30380345776160844
0.302734397434832
0.3024454083262892
0.3032905403622651
0.3014796977259702
0.2986582836306623
0.30610306570091955
0.30322277150725946
0.2992950584456513
0.30765190013407795
0.30091331164028795
0.30122904067329287
0.3021029014708448
0.3023868070215335
0.30091203171887637
0.30045518298501855
0.3018392209014482
0.3000192968217125
0.29948784283500635
0.30135650799900593
0.3019737860818858
0.30211224119597097
0.3260718066236097
0.30605647023953453
0.31097015066859857
0.3019377704712577
0.3017759916334929
0.3036428706281648
0.30497606602590066
0.3055579649539398
0.302280374328951
0.3023734192845957
0.29974719722277055
0.30068090622419275
0.3014498396054574
0.3027342897694848
0.298406610176381
0.3196765328652874
0.3024951886252784
0.30910627716513617
0.299706062803608
0.30237341860120825
0.30051211241575093
0.3031577663603459
0.3014654577346255
0.29915968264836423
0.30543340025846755
0.3009422458871512
gen	nevals
0  	50    
0.29936026

([[335, 25, 6, 15, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [335, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25, 6, 35, 3],
  [548, 25,

In [38]:
# Select the best individual
best_individual = tools.selBest(population, k=1)[0]
n_estimators, max_depth, max_features, min_samples_split, min_samples_leaf = best_individual

In [39]:
# Train the model with the best hyperparameters
rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features,min_samples_split=min_samples_split,min_samples_leaf=min_samples_leaf, random_state=42)
rf.fit(X_train, y_train)
Y_pred = rf.predict(X_test)
mse = mean_squared_error(y_test, Y_pred)
print("Mean squared error with optimized hyperparameters: ", mse)

Mean squared error with optimized hyperparameters:  0.297754972540621


In [40]:
import joblib

# Save the model to a file
joblib.dump(rf, 'model.joblib')

['model.joblib']

In [78]:
predictions = rf.predict([[50000000550, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])
print(predictions)

[4.39780404]




In [52]:
# import random
# import numpy as np
# import pygad
# from sklearn.ensemble import RandomForestRegressor
# from sklearn.metrics import mean_squared_error
# import warnings
# from sklearn.exceptions import DataConversionWarning
# warnings.filterwarnings("ignore", category=DataConversionWarning)


In [55]:
# Define the fitness function
# def fitness_func(self, solution, solution_idx):
#     if type(solution) == list:
#         solution = np.array(solution)
#     n_estimators = int(solution[0])
#     max_depth = int(solution[1])
#     max_features = int(solution[2])
    
#     # Create a random forest model with the given hyperparameters
#     rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features, random_state=42)
    
#     # Train the model on the training data
#     rf.fit(X_train, y_train)
    
#     # Evaluate the model on the test data
#     Y_pred = rf.predict(X_test)
#     mse = mean_squared_error(y_test, Y_pred)

#     print("mse: ", mse)
#     # Return the mean squared error as the fitness value
#     fitness = 1.0 / (mse + 1e-6)
#     print("fitness: ",fitness)
#     return fitness

# # Define the initial population
# num_generations = 5
# num_parents_mating = 2
# sol_per_pop = 10
# num_genes = 3
# parent_selection_type = "rank"
# crossover_type = "single_point"
# mutation_type = "random"

# init_range_low = 1
# init_range_high = 1000
# gene_space = [(init_range_low, init_range_high), (1, 50), (1, len(x.columns))]

# # initial_population = pygad.initial_population(population_size=sol_per_pop, 
# #                                               num_genes=num_genes, 
# #                                               gene_space=gene_space)

# initial_population = np.random.randint(low=init_range_low, high=init_range_high+1, size=(sol_per_pop, num_genes))


# # Create the genetic algorithm
# ga_instance = pygad.GA(num_generations=num_generations,
#                         num_parents_mating=num_parents_mating,
#                         sol_per_pop=sol_per_pop,
#                         num_genes=num_genes,
#                         gene_space=gene_space,
#                         parent_selection_type=parent_selection_type,
#                         crossover_type=crossover_type,
#                         mutation_type=mutation_type,
#                         initial_population=initial_population,
#                         fitness_func=fitness_func)

# # Run the genetic algorithm
# ga_instance.run()

# print("finished")

If you do not want to mutate any gene, please set mutation_type=None.


mse:  0.3445703683803924
fitness:  2.9021563941901345
mse:  0.34503890968842643
fitness:  2.8982154583306245
mse:  0.3451978577318896
fitness:  2.8968809647008853
mse:  0.3450779024361724
fitness:  2.8978879697954447
mse:  0.3447606714386467
fitness:  2.900554449185511
mse:  0.3451514895398358
fitness:  2.8972701351023717
mse:  0.34511125185168584
fitness:  2.897607936648266
mse:  0.34808102623702314
fitness:  2.872886057377348
mse:  0.3451152772564228
fitness:  2.8975741392139436
mse:  0.3450478975956059
fitness:  2.8981399649970503
mse:  0.34480494128113204
fitness:  2.9001820452527123
mse:  0.3446375231083332
fitness:  2.9015908929184375
mse:  0.3443207161243309
fitness:  2.9042606178197334
mse:  0.3580503378223407
fitness:  2.7928955833037103
mse:  0.3093234384878814
fitness:  3.2328515809758036
mse:  0.3445703683803924
fitness:  2.9021563941901345
mse:  0.4825486006473441
fitness:  2.072325826523309
mse:  0.3585851723859694
fitness:  2.78872995393597
mse:  0.30950343334895125
fitn

In [59]:
# # Get the best solution found by the genetic algorithm
# best_solution = ga_instance.best_solution()
# print("best_solution", best_solution)
# # Train the model with the best hyperparameters
# n_estimators, max_depth, max_features = best_solution
# rf = RandomForestRegressor(n_estimators=int(n_estimators), max_depth=int(max_depth), max_features=int(max_features), random_state=42)
# rf.fit(X_train, y_train)
# Y_pred = rf.predict(X_test)
# mse = mean_squared_error(y_test, Y_pred)
# print("Mean squared error with optimized hyperparameters: ", mse)

mse:  0.3443207161243309
fitness:  2.9042606178197334
mse:  0.31296344282043287
fitness:  3.1952511633206915
mse:  0.31199578389158494
fitness:  3.2051612440578485
mse:  0.31199578389158494
fitness:  3.2051612440578485
mse:  0.3120598691862791
fitness:  3.204503027270196
mse:  0.3443207161243309
fitness:  2.9042606178197334
mse:  0.31296344282043287
fitness:  3.1952511633206915
mse:  0.3446883549408667
fitness:  2.9011629911563572
mse:  0.3120598691862791
fitness:  3.204503027270196
best_solution (array([270.,   1.,  40.]), 3.2328515809758036, 0)


TypeError: only size-1 arrays can be converted to Python scalars