# This notebook filters chips. Right now, it filters based on obsoleteness and also 

In [19]:
import os
import shutil
import pandas as pd
from datetime import datetime

In [20]:
def process_subfolder(subfolder):
    csv_file_path = os.path.join(subfolder, 'chipinfo.csv')
    try:
        df = pd.read_csv(csv_file_path, encoding='utf-8')  # Trying with utf-8 first
    except UnicodeDecodeError:
        df = pd.read_csv(csv_file_path, encoding='ISO-8859-1')  # If utf-8 fails, try with ISO-8859-1

    # Filter the dataframe for chips that are not obsolete and were issued after January 1, 2020
    df['issue_date'] = pd.to_datetime(df['chip_info'].str.extract(r'(\d{4}-\d{2}-\d{2})')[0], errors='coerce')
    df['is_obsolete'] = df['chip_info'].str.contains('Obsolete').fillna(False)
    
    df['has_valid_image'] = df['image_urls'].str.startswith("""['http""")  # Check if 'image_urls' starts with 'http'

    # Filter to include only rows that have a valid image URL and are not obsolete
    filtered_df = df[(df['issue_date'] > '2010-01-01') & (~df['is_obsolete']) & (df['has_valid_image'])]
    
    # If there are valid chips, copy them and the CSV file
    if not filtered_df.empty:
        # Define the new subfolder path in the destination directory
        new_subfolder = os.path.join(destination_directory, os.path.basename(subfolder))
        os.makedirs(new_subfolder, exist_ok=True)
        
        # Copy the filtered CSV file
        filtered_csv_path = os.path.join(new_subfolder, 'chipinfo.csv')
        filtered_df.to_csv(filtered_csv_path, index=False)
        
        # Copy the corresponding images
        for _, row in filtered_df.iterrows():
            image_url = row['image_urls']
            image_name = image_url.split('/')[-1]  # Extract the image name from the URL
            image_name = image_name[:-2]
            source_path = os.path.join(subfolder, image_name)
            destination_path = os.path.join(new_subfolder, image_name)
            if os.path.exists(source_path):
                shutil.copy2(source_path, destination_path)

In [21]:
def process_chips(main_directory, destination_directory):
    # Process each subfolder in the main directory
    for subfolder_name in os.listdir(main_directory):
        subfolder_path = os.path.join(main_directory, subfolder_name)
        if os.path.isdir(subfolder_path):
            process_subfolder(subfolder_path)

    print(f'Processed all subfolders and copied filtered chips to {destination_directory}')

In [22]:
main_directory = r'..\Chips\UncroppedChips'
destination_directory = r'..\Chips\UncroppedPost2010'

In [23]:
process_chips(main_directory, destination_directory)

Processed all subfolders and copied filtered chips to ..\Chips\UncroppedPost2010


In [24]:
print("Done")

Done
