In [1]:
import itertools as it
from collections import defaultdict, Counter
from functools import reduce
from operator import add
from math import factorial
from string import ascii_uppercase
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

# Generate Unique Paths for All Permutations of Matches

In [2]:
def gen_contestants(N):
    """
    Generate a unique set of girls and guys for N couples.
    Returns a dictionary of each set.
    """
    return dict(
        girls=list(it.islice(ascii_uppercase, N, 2 * N)),
        guys=list(it.islice(ascii_uppercase, 0, N))
    )

In [3]:
N = 10

def reduce_path(path):
    """
    Reduces a path of matches to an unnested set of individuals.
    """
    return reduce(add, path)


def unique_couples(path):
    """
    Tests whether a path of matches has unique individuals.
    """
    count = Counter(reduce_path(path)).values()
    return max(count) == 1


def generate_unique_paths(guys, girls):
    """
    Generate all unique paths (sets of couples).
    Returns a list of lists (paths) of tuples (couples).
    """
    # get all possible pairings
    girl_perms = it.permutations(girls, len(girls))
    return [list(zip(guys, girl_perm)) for girl_perm in girl_perms]

girls, guys = gen_contestants(N).values()
paths = generate_unique_paths(guys, girls)
print(f"Number of paths for {N} contestants: {len(paths):,.0f}")

n = factorial(N)
if n != len(paths):
    print(f"ERROR: {n} != {len(paths)}")

print("First 10 paths:")
for path in paths[:10]:
    print(path) #, unique_couples(path))

# Check that all paths have unique couples
# all([unique_couples(path) for path in paths])

Number of paths for 10 contestants: 3,628,800
First 10 paths:
[('A', 'K'), ('B', 'L'), ('C', 'M'), ('D', 'N'), ('E', 'O'), ('F', 'P'), ('G', 'Q'), ('H', 'R'), ('I', 'S'), ('J', 'T')]
[('A', 'K'), ('B', 'L'), ('C', 'M'), ('D', 'N'), ('E', 'O'), ('F', 'P'), ('G', 'Q'), ('H', 'R'), ('I', 'T'), ('J', 'S')]
[('A', 'K'), ('B', 'L'), ('C', 'M'), ('D', 'N'), ('E', 'O'), ('F', 'P'), ('G', 'Q'), ('H', 'S'), ('I', 'R'), ('J', 'T')]
[('A', 'K'), ('B', 'L'), ('C', 'M'), ('D', 'N'), ('E', 'O'), ('F', 'P'), ('G', 'Q'), ('H', 'S'), ('I', 'T'), ('J', 'R')]
[('A', 'K'), ('B', 'L'), ('C', 'M'), ('D', 'N'), ('E', 'O'), ('F', 'P'), ('G', 'Q'), ('H', 'T'), ('I', 'R'), ('J', 'S')]
[('A', 'K'), ('B', 'L'), ('C', 'M'), ('D', 'N'), ('E', 'O'), ('F', 'P'), ('G', 'Q'), ('H', 'T'), ('I', 'S'), ('J', 'R')]
[('A', 'K'), ('B', 'L'), ('C', 'M'), ('D', 'N'), ('E', 'O'), ('F', 'P'), ('G', 'R'), ('H', 'Q'), ('I', 'S'), ('J', 'T')]
[('A', 'K'), ('B', 'L'), ('C', 'M'), ('D', 'N'), ('E', 'O'), ('F', 'P'), ('G', 'R'), ('H', 

### How many paths contain each of the matches?

In [11]:
n = len(paths)
dd = defaultdict(int)
for path in paths:
    for couple in path:
        dd[couple] += 1
# sum((1 if ('A', 'K') in path else 0 for path in paths))/n
for k, v in dd.items():
    print(k, f"{v:,.0f} ({v/n:.2%})")

('A', 'K') 362,880 (10.00%)
('B', 'L') 362,880 (10.00%)
('C', 'M') 362,880 (10.00%)
('D', 'N') 362,880 (10.00%)
('E', 'O') 362,880 (10.00%)
('F', 'P') 362,880 (10.00%)
('G', 'Q') 362,880 (10.00%)
('H', 'R') 362,880 (10.00%)
('I', 'S') 362,880 (10.00%)
('J', 'T') 362,880 (10.00%)
('I', 'T') 362,880 (10.00%)
('J', 'S') 362,880 (10.00%)
('H', 'S') 362,880 (10.00%)
('I', 'R') 362,880 (10.00%)
('J', 'R') 362,880 (10.00%)
('H', 'T') 362,880 (10.00%)
('G', 'R') 362,880 (10.00%)
('H', 'Q') 362,880 (10.00%)
('I', 'Q') 362,880 (10.00%)
('J', 'Q') 362,880 (10.00%)
('G', 'S') 362,880 (10.00%)
('G', 'T') 362,880 (10.00%)
('F', 'Q') 362,880 (10.00%)
('G', 'P') 362,880 (10.00%)
('H', 'P') 362,880 (10.00%)
('I', 'P') 362,880 (10.00%)
('J', 'P') 362,880 (10.00%)
('F', 'R') 362,880 (10.00%)
('F', 'S') 362,880 (10.00%)
('F', 'T') 362,880 (10.00%)
('E', 'P') 362,880 (10.00%)
('F', 'O') 362,880 (10.00%)
('G', 'O') 362,880 (10.00%)
('H', 'O') 362,880 (10.00%)
('I', 'O') 362,880 (10.00%)
('J', 'O') 362,880 (

#### Total Number of unique couples

In [12]:
len(dd)

100

## Example: Assume first match is in the first ceremony with > 0 perfect matches

In [13]:
c1_matchup = paths[0]
paths_to_drop = filter(lambda x: not any(v in c1_matchup for v in x), paths)
n_to_drop = len(list(paths_to_drop))
remaining1 = [path for path in paths if any(match in c1_matchup for match in path)]
print(f"Number of paths to drop: {n_to_drop:,.0f} leaving {len(paths) - n_to_drop:,.0f} paths")

Number of paths to drop: 1,334,961 leaving 2,293,839 paths


### Which paths *now* have the highest probabilities?

In [26]:
dd = defaultdict(int)
for path in remaining1:
    for couple in path:
        dd[couple] += 1

n = len(remaining1)
d = dict(sorted(dd.items(), key=lambda x: x[1], reverse=True))
for i, (k, v) in enumerate(d.items()):
    if i < 11:
        print(f"{i+1}. {k} {v:,.0f} ({v/n:.1%})")
    else:
        break

1. ('A', 'K') 362,880 (15.8%)
2. ('B', 'L') 362,880 (15.8%)
3. ('C', 'M') 362,880 (15.8%)
4. ('D', 'N') 362,880 (15.8%)
5. ('E', 'O') 362,880 (15.8%)
6. ('F', 'P') 362,880 (15.8%)
7. ('G', 'Q') 362,880 (15.8%)
8. ('H', 'R') 362,880 (15.8%)
9. ('I', 'S') 362,880 (15.8%)
10. ('J', 'T') 362,880 (15.8%)
11. ('I', 'T') 214,551 (9.4%)


In [27]:
remaining1_df = pd.DataFrame.from_dict(d, orient='index', columns=['count'])
remaining1_df.index.name = 'couple'
remaining1_df['pct'] = remaining1_df['count'] / n
remaining1_df.head()

Unnamed: 0,count,pct
"(A, K)",362880,0.158198
"(B, L)",362880,0.158198
"(C, M)",362880,0.158198
"(D, N)",362880,0.158198
"(E, O)",362880,0.158198


In [28]:
m = map(lambda path: remaining1_df.loc[path].sum(), remaining1)
remaining1_path_totals = pd.concat(m, axis=0)
remaining1_path_totals.head()

## Example: Truth Booth says (A, L) is not a Perfect Match

In [6]:
p2d = filter(lambda x: ('A', 'L') in x, remaining1)
n_p2d = len(list(p2d))
remaining2 = [path for path in remaining1 if ('A', 'L') not in path]
print(f"Number of paths with A-L: {n_p2d:,.0f}, leaving {len(remaining1) - n_p2d:,.0f} paths")
print(f"check: {len(remaining2):,.0f} paths")

Number of paths with A-L: 214,551, leaving 2,079,288 paths
check: 2,079,288 paths


In [7]:
n = len(remaining2)
c = Counter((match for path in remaining2 for match in path))
probs = {k: v / n for k, v in c.items()}
freqs = Counter(probs.values())
freqs

Counter({0.0928380291715241: 56,
         0.0918189303261501: 16,
         0.10318484019529763: 16,
         0.15513002527788358: 8,
         0.17452127843761903: 2,
         0.09092727895318013: 1})

In [9]:
np.average(list(freqs.keys()), weights=list(freqs.values()))

0.10101010101010101

# OLD CODE ...

In [10]:
N = 3
guys = list('ABC')
girls = list('FGH')
perms = it.permutations(girls, N)

matches = {tuple(p): 1
           for perm in perms
           for p in zip(guys, perm)}
matches

{('A', 'F'): 1,
 ('B', 'G'): 1,
 ('C', 'H'): 1,
 ('B', 'H'): 1,
 ('C', 'G'): 1,
 ('A', 'G'): 1,
 ('B', 'F'): 1,
 ('C', 'F'): 1,
 ('A', 'H'): 1}

In [11]:
for (i, c), v in matches.items():
    print(i, c, v)

A F 1
B G 1
C H 1
B H 1
C G 1
A G 1
B F 1
C F 1
A H 1


In [12]:
N = 10
guys = range(N)
girls = range(N)
perms = it.permutations(girls, N)
    
paths = [list(zip(guys, perm)) for perm in perms]
n = len(paths)
print(f'{n:,.0f} paths')

3,628,800 paths


In [13]:
idx = pd.Index(range(10), name='Guys')
cols = pd.Index(range(10), name='Girls')

In [14]:
path = paths[0]
path

[(0, 0),
 (1, 1),
 (2, 2),
 (3, 3),
 (4, 4),
 (5, 5),
 (6, 6),
 (7, 7),
 (8, 8),
 (9, 9)]

In [15]:
def gen_matrix(males, females):
    z = np.zeros((len(males), len(females)),
                 dtype='int')
    z[males, females] = 1
    return z

perms = it.permutations(girls, N)
mats = np.concatenate([gen_matrix(guys, g) for g in perms])
mats.shape

(36288000, 10)

In [16]:
n = mats.shape[0]
idx = pd.Index(list(range(N)) * int(n/N), dtype='int', name='Guys')
M = pd.DataFrame(mats, index=idx, columns=cols)
data = np.array([i for i in range(int(n/N)) for _ in range(N)],
                dtype='int')
M['Iteration'] = data
M = M.set_index('Iteration', append=True)
M.head(15)

Unnamed: 0_level_0,Girls,0,1,2,3,4,5,6,7,8,9
Guys,Iteration,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,0,1,0,0,0,0,0,0,0,0,0
1,0,0,1,0,0,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0,0
3,0,0,0,0,1,0,0,0,0,0,0
4,0,0,0,0,0,1,0,0,0,0,0
5,0,0,0,0,0,0,1,0,0,0,0
6,0,0,0,0,0,0,0,1,0,0,0
7,0,0,0,0,0,0,0,0,1,0,0
8,0,0,0,0,0,0,0,0,0,1,0
9,0,0,0,0,0,0,0,0,0,0,1


#### Check for dupes

In [17]:
M.reset_index().duplicated().sum()

In [18]:
for path in paths[:5]:
    print(path)

[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)]
[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 9), (9, 8)]
[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 8), (8, 7), (9, 9)]
[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 8), (8, 9), (9, 7)]
[(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 9), (8, 7), (9, 8)]


In [19]:
tots = M.groupby(level='Iteration').sum()
tots.describe()

Girls,0,1,2,3,4,5,6,7,8,9
count,3628800.0,3628800.0,3628800.0,3628800.0,3628800.0,3628800.0,3628800.0,3628800.0,3628800.0,3628800.0
mean,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
50%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [20]:
tots.sum(1).value_counts()

10    3628800
dtype: int64

In [21]:
all = slice(None)
M.loc[(3, all), 4]

Guys  Iteration
3     0            0
      1            0
      2            0
      3            0
      4            0
                  ..
      3628795      0
      3628796      0
      3628797      0
      3628798      0
      3628799      0
Name: 4, Length: 3628800, dtype: int64

### Now that we have all possible matches, what is the best structure for working with them?
* A single matrix of all scenarios?
* A series of matrices?

### Need to be able to drop paths with invalid matches
* Could use brute force method and just search all paths for specific matches.  Is there a better way?

### How will you account for overall results?
* On the first round, all paths have equal probability and hence are equally weighted.
* On subsequent rounds, you will update the weights based on the results for that round.

In [22]:
import are_you_the_one as ayto

In [23]:
guys = [ayto.Guy(letter) for letter in list('ABCDE')]
girls = [ayto.Girl(letter) for letter in list('FGHIJ')]
tournament = ayto.Tournament(guys, girls)

In [24]:
tournament.grid.X

Unnamed: 0,F,G,H,I,J
A,0,0,0,0,0
B,0,0,0,0,0
C,0,0,0,0,0
D,0,0,0,0,0
E,0,0,0,0,0
