# Electives 6,7 statistics 
by: Ahmed Elsarta

In [1]:
import numpy as np
import pandas as pd
import itertools
#import csv file
electives_survey = pd.read_csv('electives_survey.csv')
preferences = electives_survey[["Choose 2 s"]].copy()
preferences

Unnamed: 0,Choose 2 s
0,"A, B"
1,"A, B"
2,"A, C"
3,"A, D"
4,"A, D"
...,...
82,"D, E"
83,"D, E"
84,"D, E"
85,"D, E"


In [2]:
# #Split preferences column into two columns separated by comma
preferences = preferences["Choose 2 s"].str.split(',', expand=True)
# rename columns to 1st and 2nd
preferences = preferences.rename(columns={0: '1st', 1: '2nd'})
# strip spaces from all columns
preferences = preferences.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
# create copy of preferences for safekeeping
original_preferences = preferences.copy()

In [3]:
cols = preferences.columns
sorted_preferences = pd.DataFrame(np.sort(preferences.values, axis=1), columns=cols, index=preferences.index)
# group preferences by 2 columns
choices = preferences.copy().groupby(['1st', '2nd']).size().reset_index(name='count')
# sort the preferences 
choices = choices.sort_values(by=['count'], ascending=False)
# show preferences
# sorted_preferences

In [4]:
# calculate ratio of rows where both instances belong to input set
def calculate_satisfaction(first_group, second_group):
    # get all rows where both instances belong to input set
    both = sorted_preferences[(sorted_preferences['1st'].isin(first_group)) & (sorted_preferences['2nd'].isin(second_group))]
    # get all rows where only one instance belongs to input set
    either = sorted_preferences[(sorted_preferences['1st'].isin(first_group)) ^ (sorted_preferences['2nd'].isin(second_group))]
    # calculate ratio of rows where both instances belong to input set
    ratio = len(both) / (len(both) + len(either))
    return ratio*100

In [5]:
first_group = {'D', 'C'}
second_group = {'A', 'B', 'E'}

satisfaction_ratio = calculate_satisfaction(first_group, second_group)

print("The satisfaction ratio is", satisfaction_ratio, "%")

The satisfaction ratio is 67.24137931034483 %


In [6]:
# generating all possible permutations
permutations = list(itertools.permutations(['A', 'B', 'C', 'D', 'E']))
print(permutations)

[('A', 'B', 'C', 'D', 'E'), ('A', 'B', 'C', 'E', 'D'), ('A', 'B', 'D', 'C', 'E'), ('A', 'B', 'D', 'E', 'C'), ('A', 'B', 'E', 'C', 'D'), ('A', 'B', 'E', 'D', 'C'), ('A', 'C', 'B', 'D', 'E'), ('A', 'C', 'B', 'E', 'D'), ('A', 'C', 'D', 'B', 'E'), ('A', 'C', 'D', 'E', 'B'), ('A', 'C', 'E', 'B', 'D'), ('A', 'C', 'E', 'D', 'B'), ('A', 'D', 'B', 'C', 'E'), ('A', 'D', 'B', 'E', 'C'), ('A', 'D', 'C', 'B', 'E'), ('A', 'D', 'C', 'E', 'B'), ('A', 'D', 'E', 'B', 'C'), ('A', 'D', 'E', 'C', 'B'), ('A', 'E', 'B', 'C', 'D'), ('A', 'E', 'B', 'D', 'C'), ('A', 'E', 'C', 'B', 'D'), ('A', 'E', 'C', 'D', 'B'), ('A', 'E', 'D', 'B', 'C'), ('A', 'E', 'D', 'C', 'B'), ('B', 'A', 'C', 'D', 'E'), ('B', 'A', 'C', 'E', 'D'), ('B', 'A', 'D', 'C', 'E'), ('B', 'A', 'D', 'E', 'C'), ('B', 'A', 'E', 'C', 'D'), ('B', 'A', 'E', 'D', 'C'), ('B', 'C', 'A', 'D', 'E'), ('B', 'C', 'A', 'E', 'D'), ('B', 'C', 'D', 'A', 'E'), ('B', 'C', 'D', 'E', 'A'), ('B', 'C', 'E', 'A', 'D'), ('B', 'C', 'E', 'D', 'A'), ('B', 'D', 'A', 'C', 'E'), 

In [7]:
# calculate satisfaction ratio for first permutation as test\
satisfaction_result = calculate_satisfaction(permutations[0][0:2], permutations[0][2:5])
print(satisfaction_result)

41.37931034482759


In [8]:
# calculate satisfaction for all permutations
satisfaction_results = {}
for permutation in permutations:
    satisfaction_result = calculate_satisfaction(permutation[0:2], permutation[2:5])
    satisfaction_results.update({permutation: satisfaction_result})
# display results
print(satisfaction_results)


{('A', 'B', 'C', 'D', 'E'): 41.37931034482759, ('A', 'B', 'C', 'E', 'D'): 41.37931034482759, ('A', 'B', 'D', 'C', 'E'): 41.37931034482759, ('A', 'B', 'D', 'E', 'C'): 41.37931034482759, ('A', 'B', 'E', 'C', 'D'): 41.37931034482759, ('A', 'B', 'E', 'D', 'C'): 41.37931034482759, ('A', 'C', 'B', 'D', 'E'): 36.7816091954023, ('A', 'C', 'B', 'E', 'D'): 36.7816091954023, ('A', 'C', 'D', 'B', 'E'): 36.7816091954023, ('A', 'C', 'D', 'E', 'B'): 36.7816091954023, ('A', 'C', 'E', 'B', 'D'): 36.7816091954023, ('A', 'C', 'E', 'D', 'B'): 36.7816091954023, ('A', 'D', 'B', 'C', 'E'): 62.121212121212125, ('A', 'D', 'B', 'E', 'C'): 62.121212121212125, ('A', 'D', 'C', 'B', 'E'): 62.121212121212125, ('A', 'D', 'C', 'E', 'B'): 62.121212121212125, ('A', 'D', 'E', 'B', 'C'): 62.121212121212125, ('A', 'D', 'E', 'C', 'B'): 62.121212121212125, ('A', 'E', 'B', 'C', 'D'): 47.61904761904761, ('A', 'E', 'B', 'D', 'C'): 47.61904761904761, ('A', 'E', 'C', 'B', 'D'): 47.61904761904761, ('A', 'E', 'C', 'D', 'B'): 47.619

In [9]:
# convert results to pandas dataframe and describe it
satisfaction_results_df = pd.DataFrame(list(satisfaction_results.items()),columns = ['Permutation','Satisfaction Ratio'])
# drop records with exact same satisfaction ratio
satisfaction_results_df = satisfaction_results_df.drop_duplicates(subset ="Satisfaction Ratio", keep = "first")
# sort table by satisfaction ratio
satisfaction_results_df = satisfaction_results_df.sort_values(by=['Satisfaction Ratio'], ascending=False)

In [10]:
satisfaction_results_df

Unnamed: 0,Permutation,Satisfaction Ratio
36,"(B, D, A, C, E)",74.137931
60,"(C, D, A, B, E)",67.241379
12,"(A, D, B, C, E)",62.121212
18,"(A, E, B, C, D)",47.619048
0,"(A, B, C, D, E)",41.37931
6,"(A, C, B, D, E)",36.781609
30,"(B, C, A, D, E)",34.52381
42,"(B, E, A, C, D)",24.444444
66,"(C, E, A, B, D)",23.809524
90,"(D, E, A, B, C)",0.0
