In [1]:
import pandas as pd
from itertools import combinations as combos
import pickle

In [2]:
# create list of landmarks
lmks = ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight']

# create dictionary where the landmarks are the keys and the corresponding csv columns are the values
columns = {'one': (0, 1, 2), 'two': (3, 4, 5), 'three': (6, 7, 8), 'four': (9, 10, 11), 'five': (12, 13, 14), 'six': (15, 16, 17), 'seven': (18, 19, 20), 'eight': (21, 22, 23)}


In [6]:

# we are interested in all combinations of at least three landmarks
combos_2 = [x for x in combos(lmks, 2)]
combos_3 = [x for x in combos(lmks, 3)]
combos_4 = [x for x in combos(lmks, 4)]
combos_5 = [x for x in combos(lmks, 5)]
combos_6 = [x for x in combos(lmks, 6)]
combos_7 = [x for x in combos(lmks, 7)]

# make list of lists containing all possible combinations
combos_n = [combos_2, combos_3, combos_4, combos_5, combos_6, combos_7]

# uncomment the following for verification of all possible combos
#print(combos_n)

print("Number of possible combinations of three, four, five, six, and seven landmarks respectively:")
for i in combos_n:
    print(len(i))

total_combos = 0
for i in combos_n:
    total_combos += len(i)
print("The total number of possible combinations of at least two landmarks is: " + str(total_combos))    


Number of possible combinations of three, four, five, six, and seven landmarks respectively:
28
56
70
56
28
8
The total number of possible combinations of at least two landmarks is: 246
[('one', 'two'), ('one', 'three'), ('one', 'four'), ('one', 'five'), ('one', 'six'), ('one', 'seven'), ('one', 'eight'), ('two', 'three'), ('two', 'four'), ('two', 'five'), ('two', 'six'), ('two', 'seven'), ('two', 'eight'), ('three', 'four'), ('three', 'five'), ('three', 'six'), ('three', 'seven'), ('three', 'eight'), ('four', 'five'), ('four', 'six'), ('four', 'seven'), ('four', 'eight'), ('five', 'six'), ('five', 'seven'), ('five', 'eight'), ('six', 'seven'), ('six', 'eight'), ('seven', 'eight')]


In [4]:

# read in procrustes coordinates from full analysis, remove ID column for ease of saving combo landmarks in dictionary 
df = pd.read_csv("Proc_Coord1.csv", usecols=['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x3', 'y3', 'z3', 'x4', 'y4', 'z4', 'x5', 'y5', 'z5', 'x6', 'y6', 'z6', 'x7', 'y7', 'z7', 'x8', 'y8', 'z8'])
#print(df)


In [5]:

# create 218 unique csv files with fragment landmark coordinates

# start counting combos for naming of csv files
combo_num = 0    

# create empty dictionary for storing combo (frag) numbers as keys and associated landmarks as values
combo_dict = {}    

# loop through all combo lists
for combo_list in combos_n:
    # loop through each combination
    for combination in combo_list:
        # start with empty list of columns to keep
        keep_cols = []
        for landmark in lmks:
            if landmark in combination:
                # if the landmark is found in the combination, add the associated columns to the list of keepers
                keep_cols += columns[landmark]
        # create dataframe with only the columns in the keep list
        combo_df = df.iloc[:, keep_cols]
        # increase the combo count for the next combo in the loop
        combo_num += 1
        # add that combos landmarks to the dictionary
        combo_dict["frag_{}".format(str(combo_num))] = list(combo_df)
        # verify that it is working
        print(list(combo_df))
        # save as csv file, commented because it has no need to run again currently
        #combo_df.to_csv("frag_{}.csv".format(str(combo_num)), index=False)

    

['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x3', 'y3', 'z3']
['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x4', 'y4', 'z4']
['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x5', 'y5', 'z5']
['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x6', 'y6', 'z6']
['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x7', 'y7', 'z7']
['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x8', 'y8', 'z8']
['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x4', 'y4', 'z4']
['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x5', 'y5', 'z5']
['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x6', 'y6', 'z6']
['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x7', 'y7', 'z7']
['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x8', 'y8', 'z8']
['x1', 'y1', 'z1', 'x4', 'y4', 'z4', 'x5', 'y5', 'z5']
['x1', 'y1', 'z1', 'x4', 'y4', 'z4', 'x6', 'y6', 'z6']
['x1', 'y1', 'z1', 'x4', 'y4', 'z4', 'x7', 'y7', 'z7']
['x1', 'y1', 'z1', 'x4', 'y4', 'z4', 'x8', 'y8', 'z8']
['x1', 'y1', 'z1', 'x5', 'y5', 'z5', 'x6', 'y6', 'z6']
['x1', 'y1', 'z1', 'x5', 'y5', 'z5', 'x7', 'y7', 'z7']
['x1', 'y1', 'z1', 'x5', 'y5', 'z5', 'x8', 'y8', 'z8']
['x1', 'y1

In [8]:
print(combo_dict)

{'frag_1': ['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x3', 'y3', 'z3'], 'frag_2': ['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x4', 'y4', 'z4'], 'frag_3': ['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x5', 'y5', 'z5'], 'frag_4': ['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x6', 'y6', 'z6'], 'frag_5': ['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x7', 'y7', 'z7'], 'frag_6': ['x1', 'y1', 'z1', 'x2', 'y2', 'z2', 'x8', 'y8', 'z8'], 'frag_7': ['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x4', 'y4', 'z4'], 'frag_8': ['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x5', 'y5', 'z5'], 'frag_9': ['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x6', 'y6', 'z6'], 'frag_10': ['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x7', 'y7', 'z7'], 'frag_11': ['x1', 'y1', 'z1', 'x3', 'y3', 'z3', 'x8', 'y8', 'z8'], 'frag_12': ['x1', 'y1', 'z1', 'x4', 'y4', 'z4', 'x5', 'y5', 'z5'], 'frag_13': ['x1', 'y1', 'z1', 'x4', 'y4', 'z4', 'x6', 'y6', 'z6'], 'frag_14': ['x1', 'y1', 'z1', 'x4', 'y4', 'z4', 'x7', 'y7', 'z7'], 'frag_15': ['x1', 'y1', 'z1', 'x4', 'y4', 'z4', 'x8', 'y8', 'z8'], 'fr

In [12]:
saved = pd.DataFrame.from_dict(combo_dict, orient='index')
print(saved)
saved.to_csv("frag_dictionary.csv")

          0   1   2   3   4   5   6   7   8     9   ...    11    12    13  \
frag_1    x1  y1  z1  x2  y2  z2  x3  y3  z3  None  ...  None  None  None   
frag_2    x1  y1  z1  x2  y2  z2  x4  y4  z4  None  ...  None  None  None   
frag_3    x1  y1  z1  x2  y2  z2  x5  y5  z5  None  ...  None  None  None   
frag_4    x1  y1  z1  x2  y2  z2  x6  y6  z6  None  ...  None  None  None   
frag_5    x1  y1  z1  x2  y2  z2  x7  y7  z7  None  ...  None  None  None   
...       ..  ..  ..  ..  ..  ..  ..  ..  ..   ...  ...   ...   ...   ...   
frag_214  x1  y1  z1  x2  y2  z2  x3  y3  z3    x4  ...    z4    x6    y6   
frag_215  x1  y1  z1  x2  y2  z2  x3  y3  z3    x5  ...    z5    x6    y6   
frag_216  x1  y1  z1  x2  y2  z2  x4  y4  z4    x5  ...    z5    x6    y6   
frag_217  x1  y1  z1  x3  y3  z3  x4  y4  z4    x5  ...    z5    x6    y6   
frag_218  x2  y2  z2  x3  y3  z3  x4  y4  z4    x5  ...    z5    x6    y6   

            14    15    16    17    18    19    20  
frag_1    None  None  

In [47]:
# save combo_dict as something retreivable later, name it fragments_dictionary.pickle

with open('fragments_dictionary.pickle', 'wb') as filename:
    pickle.dump(combo_dict, filename)

# this dictionary is now retreivable with the code contained in the .ipynb file titled "Fragment Dictionary"

In [8]:
# alternate method of dictionary retreival is the following text file, which is searchable

f = open("fragments_dictionary.txt","w")
f.write( str(combo_dict) )
f.close()

### The following is sample code from stack overflow that may be* useful for keeping the column headers for each combo as a list in a dictionary. This will be hopefully useful for the LDA loop.

#### The code above (list(combo_df)) gives the appropriate list for each combo, the code below can show how to save each of those in a dictionary to be called by the loop in the LDA. May need to remove the ID column before hand and keep it separate, like the gender column. 

*now proven

In [39]:
"""
d
={}
for x in range(1,10):
        d["string{0}".format(x)]="Hello"

In [7]: d["string5"]
Out[7]: 'Hello'

In [8]: d
Out[8]: 
{'string1': 'Hello',
 'string2': 'Hello',
 'string3': 'Hello',
 'string4': 'Hello',
 'string5': 'Hello',
 'string6': 'Hello',
 'string7': 'Hello',
 'string8': 'Hello',
 'string9': 'Hello'}
 
 """

'\nd\n={}\nfor x in range(1,10):\n        d["string{0}".format(x)]="Hello"\n\nIn [7]: d["string5"]\nOut[7]: \'Hello\'\n\nIn [8]: d\nOut[8]: \n{\'string1\': \'Hello\',\n \'string2\': \'Hello\',\n \'string3\': \'Hello\',\n \'string4\': \'Hello\',\n \'string5\': \'Hello\',\n \'string6\': \'Hello\',\n \'string7\': \'Hello\',\n \'string8\': \'Hello\',\n \'string9\': \'Hello\'}\n \n '