In [1]:
import pandas as pd
from collections import Counter
import ast

def count_numbers(df, column_name):
    # Flatten all lists in the column and count occurrences
    all_numbers = []
    
    for cell in df[column_name]:
        # If the cell is a string (common when reading from files), evaluate it
        if isinstance(cell, str):
            cell = ast.literal_eval(cell)
        # Add all numbers from the cell to our list
        all_numbers.extend(cell)
    
    # Count occurrences of each number
    counts = Counter(all_numbers)
    
    # Create a dictionary with all numbers 1-16, even if they don't appear
    full_counts = {i: counts.get(i, 0) for i in range(1, 17)}
    
    return pd.DataFrame({
        'number': list(full_counts.keys()),
        'count': list(full_counts.values())
    })

In [6]:
df = pd.read_excel("Rosmersholm_refs.xlsx", index_col=0)

In [7]:
df

Unnamed: 0,Taler,Dialog,Referanser
0,act,0,[]
1,MADAM HELSETH,"Det er vel bedst, jeg [6] begynder så småt at ...",['[6]']
2,REBEKKA WEST,"Ja, gør De det. Pastoren [1] må vel snart komme.",['[1]']
3,MADAM HELSETH,"Trækker det ikke svært, dér frøkenen [2] sidder?",['[2]']
4,REBEKKA,"Jo, lidt. Vil De [6] kanske lukke.",['[6]']
...,...,...,...
1375,ROSMER,"Vi [1, 2] to følger hinanden, Rebekka [2]. Jeg...","['[1, 2]', '[2]', '[1]', '[2]', '[2]', '[1]']"
1376,REBEKKA,Det tror jeg [2] næsten også.,['[2]']
1377,ROSMER,"For nu er vi [1, 2] to et.","['[1, 2]']"
1378,REBEKKA,"Ja. Nu er vi [1, 2] et. Kom! Så går vi [1, 2] ...","['[1, 2]', '[1, 2]']"


In [12]:
from collections import Counter

def count_patterns(data):
    # Flatten the outer lists and count each pattern
    all_patterns = []
    for row in data:
        # If it's a string, evaluate it to a list
        if isinstance(row, str):
            row = eval(row)
        # Add each pattern from the row
        all_patterns.extend(row)
    
    counts = Counter(all_patterns)
    
    # Create a DataFrame with the results
    result = pd.DataFrame({
        'pattern': list(counts.keys()),
        'count': list(counts.values())
    })
    
    # Sort for readability
    result = result.sort_values('pattern')
    
    return result


In [20]:
len(df)/6

230.0

In [21]:
# Example usage:
# Assuming your list is in a DataFrame column
patterns = df['Referanser']
result = count_patterns(patterns)

In [31]:
patterns

0                                                      []
1                                                 ['[6]']
2                                                 ['[1]']
3                                                 ['[2]']
4                                                 ['[6]']
                              ...                        
1375        ['[1, 2]', '[2]', '[1]', '[2]', '[2]', '[1]']
1376                                              ['[2]']
1377                                           ['[1, 2]']
1378                                 ['[1, 2]', '[1, 2]']
1379    ['[2]', '[6]', '[6]', '[1, 2]', '[1, 2]', '[1,...
Name: Referanser, Length: 1380, dtype: object

In [41]:
patts = {}
res = {}
tot_n = len(df['Referanser'])
step = int(tot_n/6)
for i in range(0, tot_n, step):
    res[i] = count_patterns(df.iloc[i:i+step]["Referanser"])
    res[i].columns = ["pattern", f"count-{i}"]
#    result[i] = res[i].set_index("pattern").T
    

In [46]:
df_tot = pd.concat([res[i].set_index("pattern").T for i in res])

In [61]:
df_tot[['[1, 2]','[1]','[2]','[3]','[4]','[5]','[6]']].style.background_gradient(axis=1)

pattern,"[1, 2]",[1],[2],[3],[4],[5],[6]
count-0,22,124,78,167,41,7,19
count-230,7,199,69,121,115,14,5
count-460,11,225,55,82,9,108,54
count-690,25,191,114,20,21,32,52
count-920,5,88,252,82,18,5,28
count-1150,27,225,243,4,47,8,2


In [40]:
result.set_index("pattern").T

pattern,"[1, 2, 3]","[1, 2, 5]","[1, 2, 6]","[1, 2]","[1, 3, 5]","[1, 3]","[1, 4]","[1, 5]",[10],[11],...,"[4, 5]",[4],"[5, 2, 8]",[5],"[6, 2]",[6],[7],[8],[9],[Uidentificeret]
2,3,1,2,97,1,29,4,2,6,13,...,2,251,2,174,2,160,1,14,1,6


In [18]:
result.to_excel("Counts.xlsx")