In [127]:
import pandas as pd

In [128]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [129]:
df = pd.read_csv("../Source/Raw/2025-08.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3206 entries, 0 to 3205
Data columns (total 54 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   manufacturer                      3206 non-null   object 
 1   name                              3206 non-null   object 
 2   gpu_name                          3206 non-null   object 
 3   generation                        3206 non-null   object 
 4   base_clock_mhz                    3206 non-null   float64
 5   boost_clock_mhz                   3206 non-null   float64
 6   architecture                      3178 non-null   object 
 7   foundry                           3081 non-null   object 
 8   process_size_nm                   3202 non-null   float64
 9   transistor_count_m                2963 non-null   float64
 10  transistor_density_k_mm2          2928 non-null   float64
 11  die_size_mm2                      2994 non-null   float64
 12  chip_p

In [130]:
nan_columns = df.columns[df.isnull().any()]

print("Columns with NaN values:")
print(nan_columns)

Columns with NaN values:
Index(['architecture', 'foundry', 'process_size_nm', 'transistor_count_m',
       'transistor_density_k_mm2', 'die_size_mm2', 'chip_package',
       'release_date', 'bus_interface', 'memory_clock_mhz', 'memory_bus_bits',
       'memory_bandwidth_gb_s', 'thermal_design_power_w', 'board_length_mm',
       'board_width_mm', 'board_slot_width', 'suggested_psu_w',
       'power_connectors', 'display_connectors', 'directx_major_version',
       'directx_minor_version', 'opengl_major_version', 'opengl_minor_version',
       'vulkan_major_version', 'vulkan_minor_version', 'opencl_major_version',
       'opencl_minor_version', 'cuda_major_version', 'cuda_minor_version',
       'shader_model_major_version', 'shader_model_minor_version',
       'half_float_performance_gflop_s', 'single_float_performance_gflop_s',
       'double_float_performance_gflop_s'],
      dtype='object')


In [131]:
def get_association_df(df, parent, child):
    options = df.groupby([child])[parent].unique()
    count = df.groupby([child])[parent].nunique()
    
    association_df = pd.DataFrame(data={f'{parent}':options, 'count':count})
    association_df = association_df[association_df['count']>1]
    return association_df

In [132]:
#df['release_date'] = pd.to_datetime(df['release_date'])
#df.sort_values(by='release_date', ascending=False, inplace=True)
#df.drop_duplicates(subset=['name'], inplace=True)
#df

In [133]:
disp = get_association_df(df, 'manufacturer', 'architecture')

df = df[~((df['manufacturer']=='AMD') & (df['architecture']=='TeraScale'))] # remove all AMD manufactured chips from terascale architecture
df = df[~((df['manufacturer']=='ATI') & (df['architecture']=='TeraScale 2'))] # remove all ATI manufactured chips from terascale 2 architecture
df = df[~((df['manufacturer']=='AMD') & (df['architecture']=='Ultra-Threaded SE'))] # remove all AMD manufactured chips from UT SE architecture

disp

Unnamed: 0_level_0,manufacturer,count
architecture,Unnamed: 1_level_1,Unnamed: 2_level_1
TeraScale,"[ATI, AMD]",2
TeraScale 2,"[AMD, ATI]",2
Ultra-Threaded SE,"[ATI, AMD]",2


In [134]:
get_association_df(df, 'architecture', 'generation')

Unnamed: 0_level_0,architecture,count
generation,Unnamed: 1_level_1,Unnamed: 2_level_1
All-In-One(Rx 200),"[TeraScale 2, GCN 1.0, GCN 3.0]",3
All-In-One(Rx 300),"[GCN 1.0, GCN 3.0]",2
All-In-Wonder(2006 Edition),"[R300, Ultra-Threaded SE]",2
All-In-Wonder(7000),"[Rage 6, Rage 7]",2
All-In-Wonder(9000),"[R300, Rage 7]",2
All-In-Wonder(X),"[R400, R300]",2
Arctic Islands(RX 400),"[GCN 2.0, GCN 4.0]",2
Console GPU(AMD),"[RDNA 3.0, RDNA 3.5, RDNA 2.0]",3
Console GPU(Microsoft),"[Kelvin, TeraScale, GCN 1.0, GCN 2.0, RDNA 2.0]",5
Console GPU(Nintendo),"[Rage 5, Ultra-Threaded SE, TeraScale 2, Maxwe...",5


In [135]:
disp = get_association_df(df, 'architecture', 'gpu_name')

df[df['gpu_name']=='Condor']
df = df[~((df['gpu_name']=='Condor') & (df['architecture']=='G400'))] # remove all Condor chip products from G400 architecture
disp

Unnamed: 0_level_0,architecture,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Condor,"[G400, G500]",2


**Graphics**

In [136]:
disp = get_association_df(df, 'directx_major_version', 'gpu_name')
df = df[~((df['directx_major_version']==10.0) & (df['gpu_name']=='GT215'))] # remove the older GT215 chip product
disp

Unnamed: 0_level_0,directx_major_version,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
GT215,"[10.0, 11.0]",2


In [137]:
disp = get_association_df(df, 'cuda_minor_version', 'gpu_name')
df = df[~((df['cuda_minor_version']==1.0) & (df['gpu_name']=='G92B'))] # remove the older G92B chip product
disp

Unnamed: 0_level_0,cuda_minor_version,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
G92B,"[1.0, 3.0]",2


In [138]:
df.to_csv('../Source/Clean/2025-08.csv', index=False)