In [None]:
# Mounting the Google Drive to the Colab environment.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd

directory = '/content/drive/My Drive/AI_Assignment02'

def extract_info(filename):
    parts = filename.split('/')[-1].split('.')[0].split('-')
    dict={
     'modality': int(parts[0]),
     'vocal': int(parts[1]),
     'emotion': int(parts[2]),
     'intensity': int(parts[3]),
    }
    return dict


selected_files = []


for filename in os.listdir(directory):
  if filename.endswith('.csv'):
    file_info = extract_info(filename)
    if (file_info['modality'] == 1 and
      file_info['emotion'] in [3, 4] and
      file_info['intensity'] == 2 and
      file_info['vocal'] == 2):
      selected_files.append(os.path.join(directory, filename))


dfs = []
for filename in selected_files:
    df = pd.read_csv(filename)
    if extract_info(filename)['emotion'] == 3:
     emotion = 1
    else:
     emotion = 0
    df['emotion'] = emotion
    dfs.append(df)


merged_df = pd.concat(dfs, ignore_index=True)




In [None]:
merged_df.head()

Unnamed: 0,frame,timestamp,confidence,gaze_0_x,gaze_0_y,gaze_0_z,gaze_1_x,gaze_1_y,gaze_1_z,gaze_angle_x,...,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,emotion
0,1,0.0,0.98,0.022568,0.168503,-0.985443,-0.170113,0.160523,-0.972262,-0.075,...,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1
1,2,0.033,0.98,0.026686,0.171277,-0.984861,-0.163608,0.161297,-0.97325,-0.07,...,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1
2,3,0.067,0.98,0.02387,0.175268,-0.984231,-0.154623,0.158785,-0.97513,-0.067,...,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1
3,4,0.1,0.98,0.024087,0.173058,-0.984617,-0.159527,0.163184,-0.973613,-0.069,...,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1
4,5,0.133,0.98,0.029554,0.177599,-0.983659,-0.159141,0.163892,-0.973557,-0.066,...,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1


In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split

X = merged_df.drop(columns=['frame', 'timestamp', 'confidence', 'emotion'])
y = merged_df['emotion'].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



model_all_features = Sequential()
model_all_features.add(Dense(120, input_dim=X_train.shape[1], activation='relu'))
model_all_features.add(Dense(80, activation='relu'))
model_all_features.add(Dense(1, activation='sigmoid'))
model_all_features.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_all_features.fit(X_train, y_train, epochs=1, batch_size=64, verbose=0)

# Evaluate the model on the test dataset
_, accuracy_all_features = model_all_features.evaluate(X_test, y_test, verbose=1)

print("Accuracy on test dataset using all features:", accuracy_all_features)

print("\nNow calculating accuracy through genetic algorithm:\n")

number_of_features = X_train.shape[1]


def fitness_function(chromosome, X_train, y_train, X_test, y_test):
    print(chromosome)
    selected_features = X_train.iloc[:, chromosome]
    selected_features_test = X_test.iloc[:, chromosome]
    model = Sequential()
    model.add(Dense(120, input_dim=len(chromosome), activation='relu'))
    model.add(Dense(80, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

    model.fit(selected_features, y_train, epochs=1, batch_size=64, verbose=0)

    _, accuracy = model.evaluate(selected_features_test, y_test, verbose=1)

    return accuracy


def genetic_algorithm(X_train, y_train, X_test, y_test, population_size=4, mutation_rate=0.1, generations=10):

    population=[]
    for i in range(population_size):
      individual = []
      for j in range(number_of_features):
        individual.append(np.random.randint(2))
      population.append(individual)
    population = np.array(population)

    all_scores=[]
    all_population=[]
    max_scores_generations=[]

    for generation in range(generations):

        fitness_scores = []
        for chromosome in population:
           score = fitness_function(chromosome, X_train, y_train, X_test, y_test)
           fitness_scores.append(score)

        all_scores.append(fitness_scores)
        all_population.append(population)
        max_scores_generations.append(max(fitness_scores))

        print("\nGeneration "+str(generation)+": Best Fitness Score = "+str(round(max(fitness_scores),4))+"\n\n")

        total_fitness =0
        for score in fitness_scores:
          total_fitness += score

        probabilities=[]
        for score in fitness_scores:
          probabilities.append(score / total_fitness)

        selected_indices = np.random.choice(range(population_size), size=population_size, replace=True, p=probabilities)
        parents = population[selected_indices]

        offspring = crossover(parents)

        mutated_offspring = mutate(offspring, mutation_rate)

        population = mutated_offspring

    flattened_scores=[]
    for sublist in all_scores:
      for element in sublist:
        flattened_scores.append(element)

    flattened_population=[]
    for sublist in all_population:
      for element in sublist:
        flattened_population.append(element)

    best_chromosome_index = np.argmax(flattened_scores)
    best_chromosome = flattened_population[best_chromosome_index]
    best_chromosome_score= flattened_scores[best_chromosome_index]
    best_chromosome_generation= np.argmax(max_scores_generations)

    return best_chromosome, best_chromosome_score, best_chromosome_generation


def crossover(parents):
    crossover_point = np.random.randint(number_of_features)
    offspring = np.empty_like(parents)

    for i in range(len(parents) // 2):
        parent1 = parents[i * 2]
        parent2 = parents[i * 2 + 1]
        offspring[i * 2] = np.concatenate((parent1[:crossover_point], parent2[crossover_point:]))
        offspring[i * 2 + 1] = np.concatenate((parent2[:crossover_point], parent1[crossover_point:]))

    return offspring


def mutate(offspring, mutation_rate):
    mask = []
    for row in offspring:
      row_mask = []
      for i in row:
        random_number = np.random.rand()
        if random_number < mutation_rate:
            row_mask.append(True)
        else:
            row_mask.append(False)
      mask.append(row_mask)

    for i in range(len(offspring)):
      for j in range(len(offspring[i])):
        if mask[i][j]:
            offspring[i][j] = 1 - offspring[i][j]

    return offspring



best_chromosome, best_score, best_generation = genetic_algorithm(X_train, y_train, X_test, y_test)
print("Best Chromosome:\n"+str(best_chromosome))
selected_feature_names = X.columns[np.array(best_chromosome) == 1]
print("\nSelected Features:")
num_features_per_line = 37
for i, feature_name in enumerate(selected_feature_names, start=1):
    print(feature_name, end='\t')
    if i % num_features_per_line == 0:
        print()
print("\n\nBest Score: "+str(round(best_score,4)))
print("\nFound in Generation: "+str(best_generation))


Accuracy on test dataset using all features: 0.5855609774589539

Now calculating accuracy through genetic algorithm:

[0 0 1 0 1 0 1 0 0 1 1 1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0
 1 0 1 1 1 0 1 0 1 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1 1 0 1 0 0 1 0 0 1 1 0 0
 1 1 0 1 1 0 1 1 1 1 0 0 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0 0 0 1 1 0 0 1 0
 0 1 1 1 0 0 1 0 0 1 1 1 1 1 0 1 1 1 1 0 0 1 0 0 0 1 1 1 0 1 0 0 1 1 1 1 0
 1 0 1 1 1 0 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 0 1 1 0 1 0 0 1 1 1 1 1 0
 1 0 1 0 0 1 0 0 0 1 1 1 0 1 1 1 0 0 1 1 0 0 1 1 0 0 0 0 1 1 1 1 1 0 1 0 1
 1 0 1 0 0 0 0 1 0 0 1 0 1 0 0 0 1 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 0 0 0 1 1
 0 0 0 0 0 1 1 1 0 1 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 1 1 0 1 0 0 0 0 1 0 1
 1 1 0 1 1 1 0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 0 1 0 0 1 0 0 1 0 1 1 1 1 0 1 1
 1 1 0 0 1 0 1 1 0 1 1 0 1 1 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 1 1 0 0
 0 0 0 0 1 0 0 1 0 0 0 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 0 1 1 1 0 0 0 1 1 0
 0 0 1 0 1 1 1 0 1 0 0 1 0 1 0 0 1 0 1 0 0 0 1 1 1 1 1 0 