In [18]:
import os
import pandas as pd

# Define the paths
folder_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\02_filtered\items"
auction_location_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\auctions-dataset-filtered-auctions\auctions\auctions.csv"
location_info_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\auctions-dataset-filtered-auctions\auctions_data\auctions_locations.csv"
pickupdates_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\auctions-dataset-filtered-auctions\auctions_data\auctions_pickupdates.csv"

# Initialize dictionaries
location_dict = {}
auctionsID_dict = {}
pickupdates_dict = {}

# Load the auctions file to map auction_id to location_ID
try:
    auctions_df = pd.read_csv(auction_location_path, delimiter='\t', usecols=["ID", "location_ID"])
    auctions_df['ID'] = auctions_df['ID'].astype(str)
    location_dict = dict(zip(auctions_df['ID'], auctions_df['location_ID']))
except Exception as e:
    print(f"Error loading auctions file: {e}")

# Load pickup dates and create mappings
try:
    auctionsID_df = pd.read_csv(pickupdates_path, delimiter='\t', usecols=["auction_ID", "date"])
    auctionsID_df['auction_ID'] = auctionsID_df['auction_ID'].astype(str)
    auctionsID_dict = auctionsID_df.groupby('date')['auction_ID'].apply(list).to_dict()
    pickupdates_dict = auctionsID_df.groupby('auction_ID')['date'].apply(list).to_dict()
except Exception as e:
    print(f"Error loading pickup dates file: {e}")

# Process files in the items folder
bundling_data = []

try:
    files = os.listdir(folder_path)[:100]
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path):
            try:
                with open(file_path, 'r') as file:
                    for line_number, line in enumerate(file, start=1):
                        if line_number == 1:
                            continue
                        row_data = line.strip().split('\t')
                        auction_id, item_id, user_id = row_data[0], row_data[1], row_data[12]

                        # Retrieve the pickup date for the auction
                        pickup_dates = pickupdates_dict.get(auction_id, [])
                        for pickup_date in pickup_dates:
                            # Retrieve auctions with the same pickup date
                            same_pickup_auctions = auctionsID_dict.get(pickup_date, [])
                            for same_auction_id in same_pickup_auctions:
                                # Check if the auction locations match
                                if location_dict.get(auction_id) == location_dict.get(same_auction_id):
                                    # Open the items file for the matching auction
                                    same_auction_file_path = os.path.join(folder_path, f"{same_auction_id}.csv")
                                    if os.path.isfile(same_auction_file_path):
                                        try:
                                            same_auction_items_df = pd.read_csv(same_auction_file_path, delimiter='\t')
                                            same_auction_items_df['user_id'] = same_auction_items_df['user_id'].astype(str)
                                            bundling_count = same_auction_items_df['user_id'].value_counts().to_dict()

                                            # Add bundling count for the current item
                                            bundling_data.append({
                                                "Auction_ID": auction_id,
                                                "Item_ID": item_id,
                                                "User_ID": user_id,
                                                "Pickup_Date": pickup_date,
                                                "Bundling_Count": bundling_count.get(user_id, 0)
                                            })
                                        except Exception as e:
                                            print(f"Error processing items for auction {same_auction_id}: {e}")
            except Exception as e:
                print(f"Error processing file {file_name}: {e}")
except Exception as e:
    print(f"Error processing folder: {e}")

# Convert bundling data to a DataFrame
bundling_df = pd.DataFrame(bundling_data)

# Save results
output_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\02_filtered\bundling_results.csv"
try:
    bundling_df.to_csv(output_path, index=False)
    print(f"Bundling data successfully saved to {output_path}")
except Exception as e:
    print(f"Error saving bundling results CSV: {e}")


Bundling data successfully saved to C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\02_filtered\bundling_results.csv


In [14]:
auctions_with_same_pickup_dates 

['66',
 '67',
 '68',
 '69',
 '70',
 '71',
 '72',
 '73',
 '74',
 '75',
 '76',
 '77',
 '78',
 '79',
 '80',
 '81',
 '88',
 '89',
 '90',
 '91',
 '92',
 '93',
 '99',
 '101',
 '102',
 '104',
 '105',
 '106',
 '107',
 '108',
 '109',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '118',
 '119',
 '120',
 '121',
 '122',
 '123',
 '124',
 '125',
 '126',
 '127',
 '128',
 '129',
 '130',
 '131',
 '132',
 '139',
 '140',
 '141',
 '142',
 '143',
 '144',
 '146',
 '147',
 '148',
 '149',
 '150',
 '152',
 '153',
 '155',
 '156',
 '160',
 '161',
 '162',
 '163',
 '169',
 '170',
 '171',
 '172',
 '174',
 '177',
 '181',
 '187']