In [1]:
import pandas as pd

# Load the preprocessed data CSV file
file_path = '../../data_import/raw_data.csv'  # Adjust this path to your actual file location
data = pd.read_csv(file_path, dtype=str)

# Initialize a counter for the total number of changes
total_changes = 0

# Initialize a dictionary to count changes per ships_idx
ships_idx_changes = {}

# Process each group by ships_idx
for ships_idx, group in data.groupby('ships_idx'):
    # Find duplicated tag_descriptions within the group
    duplicated_descriptions = group['tag_description'].duplicated(keep=False)
    
    # Count how many tag_descriptions are duplicated within this ships_idx
    num_changes = duplicated_descriptions.sum()

    # If there are any duplicates
    if num_changes > 0:
        # Increment the total changes count
        total_changes += num_changes
        
        # Record the number of changes for this ships_idx
        ships_idx_changes[ships_idx] = num_changes

        # Apply the concatenation of tag_name to tag_description for duplicates
        data.loc[duplicated_descriptions & (data['ships_idx'] == ships_idx), 'tag_description'] = \
            data['tag_name'] + ' ' + data['tag_description']

# Output the changes per ships_idx
for ships_idx, count in ships_idx_changes.items():
    print(f"Changes made in ships_idx {ships_idx}: {count}")

# Output the total number of changes
print(f"Total number of changes made: {total_changes}")

# Optionally, save the updated DataFrame back to a CSV
output_file_path = 'raw_data_add_tag.csv'
data.to_csv(output_file_path, index=False, encoding='utf-8-sig')

print(f"Updated data saved to {output_file_path}")


Changes made in ships_idx 1000: 251
Changes made in ships_idx 1001: 54
Changes made in ships_idx 1002: 46
Changes made in ships_idx 1003: 162
Changes made in ships_idx 1004: 8
Changes made in ships_idx 1005: 18
Changes made in ships_idx 1008: 22
Changes made in ships_idx 1009: 5
Changes made in ships_idx 1010: 131
Changes made in ships_idx 1011: 46
Changes made in ships_idx 1012: 2
Changes made in ships_idx 1013: 130
Changes made in ships_idx 1014: 46
Changes made in ships_idx 1015: 145
Changes made in ships_idx 1016: 191
Changes made in ships_idx 1017: 111
Changes made in ships_idx 1018: 680
Changes made in ships_idx 1019: 2
Changes made in ships_idx 1020: 10
Changes made in ships_idx 1021: 2
Changes made in ships_idx 1022: 7
Changes made in ships_idx 1023: 7
Changes made in ships_idx 1024: 136
Changes made in ships_idx 1025: 10
Changes made in ships_idx 1026: 6
Changes made in ships_idx 1027: 6
Changes made in ships_idx 1028: 6
Changes made in ships_idx 1029: 132
Changes made in ship