In [None]:

import os 
import pandas as pd

# Define the path to the folder
folder_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\02_filtered\items" #Defines folder_path, a string with the path to the folder containing files to be processed. The r before the string indicates a raw string to treat backslashes \ as literal characters.
auction_location_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\auctions-dataset-filtered-auctions\auctions\auctions.csv"
location_info_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\auctions-dataset-filtered-auctions\auctions_data\auctions_locations.csv"
pickupdates_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\auctions-dataset-filtered-auctions\auctions_data\auctions_pickupdates.csv"

# Initialize an empty list to store rows of item details directly
data = []

# Load the auctions file containing auction_id to location mapping
try:
    auctions_df = pd.read_csv(auction_location_path)
    print("Loaded auctions file with auction_id to location mapping.")
except Exception as e:
    print(f"An error occurred while loading the auctions file: {e}")



# Create a dictionary mapping auction_id (ID) to location_ID
location_dict = {}
try:
    auctions_df = pd.read_csv(auction_location_path, delimiter='\t', usecols=["ID", "location_ID"])
    auctions_df['ID'] = auctions_df['ID'].astype(str)  # Ensure ID is a string for consistency
    
    # Create a dictionary mapping auction_id (ID) to location_ID
    location_dict = dict(zip(auctions_df['ID'], auctions_df['location_ID']))
    location_dict

except Exception as e:
    print(f"An error occurred while loading the auctions file: {e}")



# Create a dictionary for location details using location_ID as the key
location_info_dict = {}
try:
    location_info_df = pd.read_csv(location_info_path, delimiter='\t', usecols=["id", "state", "zip","tzoffset_utc","tzoffset_et"])
    location_info_df['id'] = location_info_df['id'].astype(str)  # Ensure location_ID (id) is a string
    location_info_df['zip'] = location_info_df['zip'].astype(str)  # Ensure zip is stored as string
    
    # Populate location_info_dict with location_ID as key and (state, zip, tzoffset_utc, tzoffset_et) as values
    location_info_dict = location_info_df.set_index('id')[['state', 'zip']].to_dict(orient='index')
except Exception as e:
    print(f"An error occurred while loading location info file: {e}")



#Create a dictionary for pickupdates as key and auction ID as value
auctionsID_dict = {}
try:
    auctionsID_df = pd.read_csv(pickupdates_path, delimiter='\t', usecols=["auction_ID","date"])
    auctionsID_df['date'] = auctionsID_df['date'].astype(str)
    
    # Populate auctionsID_dict with pickupdates as key and a list of auction_ID as values
    for _, row in auctionsID_df.iterrows():
        auction_id = row['auction_ID']
        pickupdate = row['date']
        if pickupdate in auctionsID_dict:
            auctionsID_dict[pickupdate].append(auction_id)
        else:
            auctionsID_dict[pickupdate] = [auction_id]
    print("Created pickupdates dictionary for pickupdate to auction_ID mapping.")
except Exception as e:
    print(f"An error occurred while loading pickupdates file: {e}")


# Create a dictionary for pickupdates using pickupdates_path
pickupdates_dict = {}
try:
    pickupdates_df = pd.read_csv(pickupdates_path, delimiter='\t', usecols=["auction_ID", "date"])
    pickupdates_df['auction_ID'] = pickupdates_df['auction_ID'].astype(str)  # Ensure auction_ID is a string
    
    # Populate pickupdates_dict with auction_ID as key and a list of pickupdate as values
    for _, row in pickupdates_df.iterrows():
        auction_id = row['auction_ID']
        pickupdate = row['date']
        if auction_id in pickupdates_dict:
            pickupdates_dict[auction_id].append(pickupdate)
        else:
            pickupdates_dict[auction_id] = [pickupdate]
    print("Created pickupdates dictionary for auction_ID to pickupdate mapping.")
except Exception as e:
    print(f"An error occurred while loading pickupdates file: {e}")


# Create a nested dictionary for auction, location and pickupdates
auction_location_pickupdates = {}
try:
    for auction_id, location_id in location_dict.items():
        if auction_id in pickupdates_dict:
            pickupdates = pickupdates_dict[auction_id]
            if auction_id not in auction_location_pickupdates:
                auction_location_pickupdates[auction_id] = {}
            auction_location_pickupdates[auction_id][location_id] = pickupdates
except Exception as e:
    print(f"An error occurred while building the nested dictionary: {e}")


# Initialize variables
bundling_data = []
pickup_dates = []
filtered_auctions = []
user_id_counts = {}

# MAIN
try:
    files = os.listdir(folder_path)[:100]
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        if os.path.isfile(file_path):
            print(f"\n--- Processing '{file_name}' ---")
            try:
                with open(file_path, 'r') as file:
                    for line_number, line in enumerate(file, start=1):
                        row_data = line.strip().split('\t')
                        if line_number == 1 or len(row_data) < 13:  # Skip header and invalid rows
                            continue

                        auction_id = row_data[0]
                        item_id = row_data[1]
                        user_id = row_data[12]

                        if not user_id or user_id.lower() == 'none':
                            continue  # Skip invalid user IDs


                        # Retrieve pickup dates
                        pickup_dates = []
                        if auction_id in auction_location_pickupdates:
                            for location_id, dates in auction_location_pickupdates[auction_id].items():
                                pickup_dates.extend(dates)



                        # Retrieve auctions with the same pickup date
                        filtered_auctions = []
                        for pickup_date in pickup_dates:
                            same_pickup_auctions = auctionsID_dict.get(pickupdate, [])
                            for same_auction_id in same_pickup_auctions:
                                locations_auction_id = auction_location_pickupdates.get(auction_id, {}).keys()
                                locations_same_auction_id = auction_location_pickupdates.get(same_auction_id, {}).keys()

                                # Check location match
                                if set(locations_auction_id) & set(locations_same_auction_id):
                                    filtered_auctions.append(same_auction_id)


                        # Process items in filtered auctions
                        for loc_auction_id in filtered_auctions:
                            same_auction_file_path = os.path.join(folder_path, f"{loc_auction_id}.csv")
                            if os.path.isfile(same_auction_file_path):
                                try:
                                    same_auction_items_df = pd.read_csv(same_auction_file_path, delimiter='\t')
                                    same_auction_items_df['user_id'] = same_auction_items_df['user_id'].astype(str)

                                    # Count occurrences of user IDs
                                    for _, item_row in same_auction_items_df.iterrows():
                                        current_user_id = item_row['user_id']
                                        current_item_id = item_row['item_id']

                                        if current_user_id in user_id_counts:
                                            user_id_counts[current_user_id] += 1
                                        else:
                                            user_id_counts[current_user_id] = 1
                                            
                                            
                                            
                                            
                                            
                                            bundling_data.append([loc_auction_id, current_item_id,current_user_id])

                                except Exception as e:
                                    print(f"Error processing items for auction {loc_auction_id}: {e}")
            except Exception as e:
                print(f"An error occurred while reading '{file_name}': {e}")

except FileNotFoundError:
    print(f"The folder '{folder_path}' was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

# Convert bundling data to a DataFrame
df = pd.DataFrame(bundling_data, columns=["location_id", "current_item_id", "current_user_id"])
bundling_df = pd.DataFrame(bundling_data)

# Define the path for saving the output as CSV
csv_path = r"C:\Users\mydoa\Desktop\BIDFTA DATASET\auctions-dataset\tools\nodejs-dataset-downloader\02_filtered\calculation5.csv"
try:
    bundling_df.to_csv(csv_path, index=False)  # Save as CSV without the index
    print(f"\nData successfully saved as CSV at '{csv_path}'")
except Exception as e:
    print(f"An error occurred while saving CSV: {e}")




Loaded auctions file with auction_id to location mapping.
Created pickupdates dictionary for pickupdate to auction_ID mapping.
Created pickupdates dictionary for auction_ID to pickupdate mapping.

--- Processing '000002.csv' ---

--- Processing '000003.csv' ---

--- Processing '000004.csv' ---

--- Processing '000005.csv' ---

--- Processing '000006.csv' ---

--- Processing '000007.csv' ---

--- Processing '000008.csv' ---

--- Processing '000009.csv' ---

--- Processing '000010.csv' ---

--- Processing '000011.csv' ---

--- Processing '000012.csv' ---

--- Processing '000016.csv' ---

--- Processing '000017.csv' ---

--- Processing '000018.csv' ---

--- Processing '000019.csv' ---

--- Processing '000020.csv' ---

--- Processing '000021.csv' ---

--- Processing '000022.csv' ---

--- Processing '000023.csv' ---

--- Processing '000032.csv' ---

--- Processing '000033.csv' ---

--- Processing '000034.csv' ---

--- Processing '000035.csv' ---

--- Processing '000042.csv' ---

--- Process

In [3]:
print(pickup_dates)


['2018-09-27', '2018-09-28']
