In [1]:
import os

import pandas as pd
from dotenv import load_dotenv
from google.generativeai import types
import json


  from .autonotebook import tqdm as notebook_tqdm


# Table conversion

In [2]:
def read_json(json_file_path: str):
    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        print(f"Error: JSON file not found at {json_file_path}")
        return
    except json.JSONDecodeError:
        print(f"Error: Could not decode JSON from {json_file_path}")
        return

In [3]:
INPUT_JSON_FILE_PATH = os.path.join("data", "01_raw", "octopart.json")
OUTPUT_JSON_FILE_PATH = os.path.join("data", "02_processed", "octopart.json")

raw_json = read_json(INPUT_JSON_FILE_PATH)

result_json_list = raw_json['data']['supSearchMpn']['results'][0]['part']['similarParts']

In [4]:
def process_parts_list(result_json_list):
   """
   Process a list of part data dictionaries into a single DataFrame.
   
   Args:
       result_json_list (list): List of part data dictionaries
       
   Returns:
       pd.DataFrame: Complete DataFrame with all parts and their best seller info
   """
   all_parts_data = []
   
   for part_data in result_json_list:
       # Extract basic part information
       part_info = {
           'name': part_data.get('name', ''),
           'mpn': part_data.get('mpn', ''),
           'manufacturer': part_data.get('manufacturer', {}).get('name', ''),
           'short_description': part_data.get('shortDescription', ''),
           'category': part_data.get('category', {}).get('name', '')
       }
       
       # Find best seller (most economic option)
       best_seller_info = {
           'best_seller_company': 'No sellers available',
           'best_seller_country': '',
           'best_unit_price': '',
           'best_price_currency': '',
           'best_price_min_qty': ''
       }
       
       lowest_price = float('inf')
       
       for seller in part_data.get('sellers', []):
           company_name = seller.get('company', {}).get('name', 'Unknown')
           country = seller.get('country', 'Unknown')
           
           for offer in seller.get('offers', []):
               prices = offer.get('prices', [])
               
               if prices:
                   for price_break in prices:
                       unit_price = price_break.get('convertedPrice', price_break.get('price', float('inf')))
                       quantity = price_break.get('quantity', 1)
                       currency = price_break.get('convertedCurrency', price_break.get('currency', 'USD'))
                       
                       if unit_price < lowest_price:
                           lowest_price = unit_price
                           best_seller_info = {
                               'best_seller_company': company_name,
                               'best_seller_country': country,
                               'best_unit_price': f"{unit_price:.4f}",
                               
                               'best_price_currency': currency,
                               'best_price_min_qty': quantity
                           }
       
       # Combine part info with best seller info
       combined_info = {**part_info, **best_seller_info}
       
       # Add specs as columns
       for spec in part_data.get('specs', []):
           column_name = spec['attribute']['name'].replace(' ', '_').replace('(', '').replace(')', '').lower()
           value_with_units = f"{spec['value']} {spec['units']}".strip()
           combined_info[column_name] = value_with_units
       
       all_parts_data.append(combined_info)
   
   return pd.DataFrame(all_parts_data)

final_df = process_parts_list(result_json_list)
final_df

Unnamed: 0,name,mpn,manufacturer,short_description,category,best_seller_company,best_seller_country,best_unit_price,best_price_currency,best_price_min_qty,...,saturation_current,lifecycle_status,width,case_code_imperial,case_code_metric,current,depth,number_of_pins,resistance,series_resistance
0,Taiyo Yuden LHL08TB330K,LHL08TB330K,Taiyo Yuden,Fixed Ind 33UH 1.4A 100 Mohm Th,Fixed Inductors,Quest,US,0.0696,USD,7959,...,,,,,,,,,,
1,Bourns RLB9012-330KL,RLB9012-330KL,Bourns,Inductor Power Wirewound 33uH 10% 1KHz 30Q-Fac...,Fixed Inductors,Avnet,US,0.2379,USD,10000,...,,,,,,,,,,
2,Taiyo Yuden LHL10NB330K,LHL10NB330K,Taiyo Yuden,Ind Power 33uH 10% 2.52MHz 60Q-Factor Ferrite ...,Fixed Inductors,Ampacity Systems,MY,1.05,USD,1,...,,,,,,,,,,
3,Abracon AIUR-03-330K,AIUR-03-330K,Abracon,Fixed Ind 33Uh 1.1A 90 Mohm Th/Bulk Rohs Compl...,Fixed Inductors,Bettlink,HK,0.1885,USD,10000,...,1.6 A,,,,,,,,,
4,Abracon ASPI-0705-330K-T,ASPI-0705-330K-T,Abracon,Fixed Ind 33Uh 1.2A 130 Mohm Smd/Tape & Reel (...,Fixed Inductors,Onlinecomponents.com,US,0.1555,USD,100000,...,,Production,7.0104 mm,,,,,,,
5,Bourns CM322522-330KL,CM322522-330KL,Bourns,Inductor General Purpose Chip Wirewound 33uH 1...,Fixed Inductors,Ampacity Systems,MY,0.058,USD,1,...,,,2.4892 mm,1210.0,3225.0,70 mA,2.5 mm,2.0,5.6 Ω,5.6 Ω


In [6]:
nan_percentage = final_df.isnull().sum() / len(final_df)

# Keep columns where NaN percentage < 0.5 (50%)
df_cleaned = final_df.loc[:, nan_percentage < 0.5]


In [8]:
pd.set_option('display.max_columns', None)
df_cleaned

Unnamed: 0,name,mpn,manufacturer,short_description,category,best_seller_company,best_seller_country,best_unit_price,best_price_currency,best_price_min_qty,case/package,core_material,current_rating,dc_resistance_dcr,diameter,height,height_-_seated_max,inductance,lead/base_style,length,max_dc_current,max_operating_temperature,min_operating_temperature,mount,q_factor,radiation_hardening,rohs,self_resonant_frequency,shielding,termination,test_frequency,tolerance,composition,packaging
0,Taiyo Yuden LHL08TB330K,LHL08TB330K,Taiyo Yuden,Fixed Ind 33UH 1.4A 100 Mohm Th,Fixed Inductors,Quest,US,0.0696,USD,7959,Radial,Ferrite,1.4 A,100 mΩ,9 mm,9.5 mm,9.4996 mm,33 µH,Radial,9.5 mm,1.4 A,105 °C,-25 °C,Through Hole,40.0,No,Compliant,8.8 MHz,Unshielded,Radial,2.52 MHz,10 %,,
1,Bourns RLB9012-330KL,RLB9012-330KL,Bourns,Inductor Power Wirewound 33uH 10% 1KHz 30Q-Fac...,Fixed Inductors,Avnet,US,0.2379,USD,10000,Radial,Ferrite,2.05 A,90 mΩ,9 mm,12.2 mm,12.192 mm,33 µH,Radial,,2.05 A,125 °C,-55 °C,Through Hole,30.0,No,Compliant,7.8 MHz,Unshielded,Radial,1 kHz,10 %,Wirewound,Bulk
2,Taiyo Yuden LHL10NB330K,LHL10NB330K,Taiyo Yuden,Ind Power 33uH 10% 2.52MHz 60Q-Factor Ferrite ...,Fixed Inductors,Ampacity Systems,MY,1.05,USD,1,Radial,Ferrite,1.9 A,78 mΩ,11 mm,13.9954 mm,,33 µH,Radial,14 mm,1.9 A,105 °C,-25 °C,Through Hole,60.0,No,Compliant,6.8 MHz,Unshielded,Radial,2.52 MHz,10 %,,Bulk
3,Abracon AIUR-03-330K,AIUR-03-330K,Abracon,Fixed Ind 33Uh 1.1A 90 Mohm Th/Bulk Rohs Compl...,Fixed Inductors,Bettlink,HK,0.1885,USD,10000,Radial,Ferrite,1.1 A,90 mΩ,9 mm,12 mm,11.9888 mm,33 µH,Radial,12 mm,1.6 A,85 °C,-25 °C,Through Hole,30.0,No,Compliant,7.8 MHz,Unshielded,Radial,2.52 MHz,10 %,Wirewound,Box
4,Abracon ASPI-0705-330K-T,ASPI-0705-330K-T,Abracon,Fixed Ind 33Uh 1.2A 130 Mohm Smd/Tape & Reel (...,Fixed Inductors,Onlinecomponents.com,US,0.1555,USD,100000,Nonstandard,Ferrite,1.2 A,130 mΩ,,5 mm,5.5118 mm,33 µH,,7.8 mm,,85 °C,-40 °C,Surface Mount,,,Compliant,,Unshielded,,2.52 MHz,10 %,Wirewound,Tape & Reel (TR)
5,Bourns CM322522-330KL,CM322522-330KL,Bourns,Inductor General Purpose Chip Wirewound 33uH 1...,Fixed Inductors,Ampacity Systems,MY,0.058,USD,1,1210,Ferrite,85 mA,5.6 Ω,,2.2 mm,2.1844 mm,33 µH,,3.2 mm,85 mA,100 °C,-20 °C,Surface Mount,30.0,No,Compliant,20 MHz,Unshielded,SMD/SMT,2.52 MHz,10 %,Wirewound,Cut Tape


In [9]:
df_cleaned.to_markdown()

'|    | name                     | mpn              | manufacturer   | short_description                                                                                    | category        | best_seller_company   | best_seller_country   |   best_unit_price | best_price_currency   |   best_price_min_qty | case/package   | core_material   | current_rating   | dc_resistance_dcr   | diameter   | height     | height_-_seated_max   | inductance   | lead/base_style   | length   | max_dc_current   | max_operating_temperature   | min_operating_temperature   | mount         |   q_factor | radiation_hardening   | rohs      | self_resonant_frequency   | shielding   | termination   | test_frequency   | tolerance   | composition   | packaging        |\n|---:|:-------------------------|:-----------------|:---------------|:-----------------------------------------------------------------------------------------------------|:----------------|:----------------------|:----------------------|------------