In [29]:
# Importing libraries
import pandas as pd

In [30]:
# Read Data
df = pd.read_csv("D:\hotel_bookings.csv", nrows=300)

# Exploring Data

In [31]:
# Check if there are any nan values 
df.isna().sum()

reservation_date      0
country_from          1
reserved_room_type    0
customer_type         0
dtype: int64

In [32]:
df.duplicated()

0      False
1       True
2      False
3       True
4       True
       ...  
295     True
296     True
297    False
298     True
299     True
Length: 300, dtype: bool

In [33]:
df.duplicated().sum()

138

In [34]:
df.drop_duplicates()

Unnamed: 0,reservation_date,country_from,reserved_room_type,customer_type
0,7/1/2015,PRT,C,Transient
2,7/1/2015,GBR,A,Transient
8,7/1/2015,PRT,A,Transient
9,7/1/2015,PRT,D,Transient
10,7/1/2015,PRT,E,Transient
...,...,...,...,...
290,7/11/2015,CZE,E,Transient
291,7/11/2015,GBR,A,Transient
292,7/11/2015,DEU,A,Transient
293,7/11/2015,ESP,G,Transient


In [35]:
# unique values of each column
df.nunique()

reservation_date      11
country_from          24
reserved_room_type     7
customer_type          3
dtype: int64

# Converting data into lists 

In [36]:
# Extract the relevant columns and create a list of transactions
transactions = df[["reservation_date", "country_from", "reserved_room_type", "customer_type"]].values.tolist()

# Building the model

In [37]:
# Implement Apriori algorithm from scratch to generate frequent item sets
def generate_frequent_item_sets(transactions, min_support):
    item_counts = {}
    for transaction in transactions:
        for item in transaction:
            if item in item_counts:
                item_counts[item] += 1
            else:
                item_counts[item] = 1
    frequent_items = {frozenset([item]): count for item, count in item_counts.items() if count >= min_support}
    current_frequent_items = frequent_items
    while current_frequent_items:
        next_frequent_items = {}
        for itemset in current_frequent_items.keys():
            for item in item_counts.keys():
                if item not in itemset:
                    new_itemset = itemset.union(frozenset([item]))
                    if new_itemset not in next_frequent_items:
                        next_frequent_items[new_itemset] = 0
                    for transaction in transactions:
                        if new_itemset.issubset(transaction):
                            next_frequent_items[new_itemset] += 1
        current_frequent_items = {itemset: count for itemset, count in next_frequent_items.items() if count >= min_support}
        frequent_items.update(current_frequent_items)
    return frequent_items

In [38]:
# Generate association rules from frequent item sets based on minimum confidence threshold provided by user
def generate_association_rules(frequent_item_sets, min_confidence):
    association_rules = []
    for itemset in frequent_item_sets.keys():
        if len(itemset) > 1:
            for item in itemset:
                antecedent = frozenset([item])
                consequent = itemset.difference(antecedent)
                if antecedent in frequent_item_sets and consequent in frequent_item_sets:
                    confidence = frequent_item_sets[itemset] / frequent_item_sets[antecedent]
                    if confidence >= min_confidence:
                        association_rules.append((antecedent, consequent, confidence))
    return association_rules

In [39]:
# Get input from user for minimum support and minimum confidence and generate frequent item sets and association rules
min_support = int(input("Enter minimum support: "))
frequent_item_sets = generate_frequent_item_sets(transactions, min_support)

min_confidence = float(input("Enter minimum confidence: "))
association_rules = generate_association_rules(frequent_item_sets, min_confidence)

# Display Results

In [40]:
# Print frequent item sets and association rules with their confidence
print("Frequent Item Sets:")
for itemset, count in frequent_item_sets.items():
    print(f"{set(itemset)}\t\t{count}")

print("\nAssociation Rules:")
for antecedent, consequent, confidence in association_rules:
    print(f"{set(antecedent)} => {set(consequent)}\t\t{confidence}")

Frequent Item Sets:
{'PRT'}		160
{'Transient'}		261
{'A'}		132
{'D'}		61
{'Transient', 'PRT'}		304
{'A', 'PRT'}		136
{'D', 'PRT'}		66
{'A', 'Transient'}		236
{'D', 'Transient'}		92
{'A', 'Transient', 'PRT'}		198
{'D', 'Transient', 'PRT'}		90

Association Rules:
{'Transient'} => {'PRT'}		1.1647509578544062
{'PRT'} => {'Transient'}		1.9
{'A'} => {'PRT'}		1.0303030303030303
{'PRT'} => {'A'}		0.85
{'D'} => {'PRT'}		1.0819672131147542
{'A'} => {'Transient'}		1.7878787878787878
{'Transient'} => {'A'}		0.9042145593869731
{'D'} => {'Transient'}		1.5081967213114753
{'A'} => {'Transient', 'PRT'}		1.5
{'Transient'} => {'A', 'PRT'}		0.7586206896551724
{'PRT'} => {'Transient', 'A'}		1.2375
{'D'} => {'Transient', 'PRT'}		1.4754098360655739
{'PRT'} => {'D', 'Transient'}		0.5625
