In [1]:
import pandas as pd
from fuzzywuzzy import fuzz
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [12]:
def load_data():
    df1 = pd.read_csv("_ASSOC_Truco_01.csv")
    df2 = pd.read_csv("_ASSOC_Truco_02.csv")

    return pd.concat([df1, df2], ignore_index=True)

def data_preprocessing(df):
    df = remove_rows_with_nan(df)
    df = format_pairs(df)
    df = remove_jogadores_column(df)
    df = merge_similar_names(df)
    df = add_wins_columns(df)

    return df

def remove_rows_with_nan(dataframe):
    cleaned_df = dataframe.dropna(how='any')
    
    return cleaned_df

def remove_spaces_and_commas(name):
    return name.replace(' ', '').replace(',', '')

def format_pairs(dataframe):
    dataframe_copy = dataframe.copy()  # Create a copy of the DataFrame
    
    dataframe_copy['Player1'] = dataframe['Jogadore(a)s'].str.split(', ').str[0].apply(remove_spaces_and_commas)
    dataframe_copy['Player2'] = dataframe['Jogadore(a)s'].str.split(', ').str[1].apply(remove_spaces_and_commas)
    
    return dataframe_copy

def remove_jogadores_column(dataframe):
    dataframe = dataframe.drop(columns=['Jogadore(a)s'])

    return dataframe

def merge_similar_names(dataframe, threshold=85):
    unique_players = pd.concat([dataframe['Player1'], dataframe['Player2']]).unique()
    
    merged_names = {}
    
    for name1 in unique_players:
        if name1 not in merged_names:
            merged_names[name1] = name1
        else:
            continue
        
        for name2 in unique_players:
            if name1 != name2 and fuzz.ratio(name1, name2) >= threshold:
                merged_names[name2] = name1
    
    dataframe['Player1'] = dataframe['Player1'].replace(merged_names)
    dataframe['Player2'] = dataframe['Player2'].replace(merged_names)
    
    return dataframe

def add_wins_columns(dataframe):
    dataframe['Wins'] = (dataframe['Amigos'] > dataframe['Oponentes'])
    dataframe['GreatWins'] = (dataframe['Amigos'] > dataframe['Oponentes']) & (dataframe['Oponentes'] < 12)
    return dataframe

def create_association_rules(dataframe):
    frequent = apriori(dataframe, min_support=0.05, use_colnames=True)
    association_rules_df = association_rules(frequent, metric='lift', min_threshold=1.0)
    association_rules_df = association_rules_df.drop(columns=['antecedent support', 'consequent support', 'lift', 'leverage', 'conviction', 'zhangs_metric'])
    association_rules_df['support'] = association_rules_df['support'].round(3)
    association_rules_df['confidence'] = association_rules_df['confidence'].round(3)

    return association_rules_df

def create_table_for_apriori(df):
    players = set(df['Player1'].unique()).union(set(df['Player2'].unique()))

    player_columns = pd.DataFrame(False, columns=players, index=df.index)

    for i, row in df.iterrows():
        for player in players:
            if player == row['Player1'] or player == row['Player2']:
                player_columns.at[i, player] = True

    result_df = pd.concat([player_columns, df[['Wins', 'GreatWins']]], axis=1)

    return result_df

In [13]:
df = load_data()
print(df)

     Partida           Jogadore(a)s  Oponentes  Amigos
0        1.0        Gaudencio, Xiru         24       0
1        2.0             Xiru, Peao         14      24
2        3.0               Peao, Ze         24       4
3        4.0     Prenda, Estanciera         24       8
4        5.0       Estanciera, Xiru          9      24
..       ...                    ...        ...     ...
149    150.0  Prenda, Estancieira           24      22
150    151.0   Prenda, Estancieira           6      24
151    152.0        Ze, Estancieira          0      24
152    153.0             Prenda, Ze         24       1
153      NaN                    NaN          0      24

[154 rows x 4 columns]


In [14]:
df = data_preprocessing(df)
print(df)

     Partida  Oponentes  Amigos     Player1     Player2   Wins  GreatWins
0        1.0         24       0   Gaudencio        Xiru  False      False
1        2.0         14      24        Xiru        Peao   True      False
2        3.0         24       4        Peao          Ze  False      False
3        4.0         24       8      Prenda  Estanciera  False      False
4        5.0          9      24  Estanciera        Xiru   True       True
..       ...        ...     ...         ...         ...    ...        ...
148    149.0         24      15        Peao      Prenda  False      False
149    150.0         24      22      Prenda  Estanciera  False      False
150    151.0          6      24      Prenda  Estanciera   True       True
151    152.0          0      24          Ze  Estanciera   True       True
152    153.0         24       1      Prenda          Ze  False      False

[153 rows x 7 columns]


In [16]:
df_general = create_table_for_apriori(df)
print(df_general)



      Peao  Mafalda   Xiru  Prenda  Francois  Gaudencio     Ze  Estanciera  \
0    False    False   True   False     False       True  False       False   
1     True    False   True   False     False      False  False       False   
2     True    False  False   False     False      False   True       False   
3    False    False  False    True     False      False  False        True   
4    False    False   True   False     False      False  False        True   
..     ...      ...    ...     ...       ...        ...    ...         ...   
148   True    False  False    True     False      False  False       False   
149  False    False  False    True     False      False  False        True   
150  False    False  False    True     False      False  False        True   
151  False    False  False   False     False      False   True        True   
152  False    False  False    True     False      False   True       False   

      Wins  GreatWins  
0    False      False  
1     True     

In [19]:
assocGeneral = create_association_rules(df_general)

assoc1 = assocGeneral[
    assocGeneral['antecedents'].apply(lambda x: 'GreatWins' not in x and 'Wins' not in x) &
    assocGeneral['consequents'].apply(lambda x: len(x) == 1 and ('Wins' in x or 'GreatWins' in x))
]

assoc2 = assocGeneral[
    assocGeneral['antecedents'].apply(lambda x: 'GreatWins' not in x and 'Wins' not in x) &
    assocGeneral['consequents'].apply(lambda x: len(x) < 3 and ('Wins' in x and 'GreatWins' in x))
]

print("\nAssociation Rules:")

assoc = pd.concat([assoc1, assoc2], ignore_index=True)

print(assoc)


Association Rules:
          antecedents        consequents  support  confidence
0              (Peao)             (Wins)    0.222       0.557
1              (Peao)        (GreatWins)    0.170       0.426
2            (Prenda)             (Wins)    0.176       0.574
3         (Gaudencio)             (Wins)    0.144       0.710
4         (Gaudencio)        (GreatWins)    0.118       0.581
5      (Peao, Prenda)             (Wins)    0.105       0.696
6  (Xiru, Estanciera)        (GreatWins)    0.078       0.429
7              (Peao)  (GreatWins, Wins)    0.170       0.426
8         (Gaudencio)  (GreatWins, Wins)    0.118       0.581
9  (Xiru, Estanciera)  (GreatWins, Wins)    0.078       0.429


In [20]:
one_player_rules = assoc[assoc['antecedents'].apply(lambda x: ',' not in str(x))]
two_player_rules = assoc[assoc['antecedents'].apply(lambda x: ',' in str(x))]

print("One Player Rules:")
print(one_player_rules)

print("\nTwo Player Rules:")
print(two_player_rules)

One Player Rules:
   antecedents        consequents  support  confidence
0       (Peao)             (Wins)    0.222       0.557
1       (Peao)        (GreatWins)    0.170       0.426
2     (Prenda)             (Wins)    0.176       0.574
3  (Gaudencio)             (Wins)    0.144       0.710
4  (Gaudencio)        (GreatWins)    0.118       0.581
7       (Peao)  (GreatWins, Wins)    0.170       0.426
8  (Gaudencio)  (GreatWins, Wins)    0.118       0.581

Two Player Rules:
          antecedents        consequents  support  confidence
5      (Peao, Prenda)             (Wins)    0.105       0.696
6  (Xiru, Estanciera)        (GreatWins)    0.078       0.429
9  (Xiru, Estanciera)  (GreatWins, Wins)    0.078       0.429
