In [48]:
import pandas as pd

In [49]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 500)

In [50]:
df = pd.read_csv("../Source/Raw/2025-08.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3206 entries, 0 to 3205
Data columns (total 54 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   manufacturer                      3206 non-null   object 
 1   name                              3206 non-null   object 
 2   gpu_name                          3206 non-null   object 
 3   generation                        3206 non-null   object 
 4   base_clock_mhz                    3206 non-null   float64
 5   boost_clock_mhz                   3206 non-null   float64
 6   architecture                      3178 non-null   object 
 7   foundry                           3081 non-null   object 
 8   process_size_nm                   3202 non-null   float64
 9   transistor_count_m                2963 non-null   float64
 10  transistor_density_k_mm2          2928 non-null   float64
 11  die_size_mm2                      2994 non-null   float64
 12  chip_p

In [51]:
nan_columns = df.columns[df.isnull().any()]

print("Columns with NaN values:")
print(nan_columns)

Columns with NaN values:
Index(['architecture', 'foundry', 'process_size_nm', 'transistor_count_m',
       'transistor_density_k_mm2', 'die_size_mm2', 'chip_package',
       'release_date', 'bus_interface', 'memory_clock_mhz', 'memory_bus_bits',
       'memory_bandwidth_gb_s', 'thermal_design_power_w', 'board_length_mm',
       'board_width_mm', 'board_slot_width', 'suggested_psu_w',
       'power_connectors', 'display_connectors', 'directx_major_version',
       'directx_minor_version', 'opengl_major_version', 'opengl_minor_version',
       'vulkan_major_version', 'vulkan_minor_version', 'opencl_major_version',
       'opencl_minor_version', 'cuda_major_version', 'cuda_minor_version',
       'shader_model_major_version', 'shader_model_minor_version',
       'half_float_performance_gflop_s', 'single_float_performance_gflop_s',
       'double_float_performance_gflop_s'],
      dtype='object')


In [52]:
def get_association_df(df, parent, child):
    options = df.groupby([child])[parent].unique()
    count = df.groupby([child])[parent].nunique()
    
    association_df = pd.DataFrame(data={f'{parent}':options, 'count':count})
    association_df = association_df[association_df['count']>1]
    return association_df

In [53]:
#df['release_date'] = pd.to_datetime(df['release_date'])
#df.sort_values(by='release_date', ascending=False, inplace=True)
#df.drop_duplicates(subset=['name'], inplace=True)
#df

In [54]:
disp = get_association_df(df, 'manufacturer', 'architecture')

df = df[~((df['manufacturer']=='AMD') & (df['architecture']=='TeraScale'))] # remove all AMD manufactured chips from terascale architecture
df = df[~((df['manufacturer']=='ATI') & (df['architecture']=='TeraScale 2'))] # remove all ATI manufactured chips from terascale 2 architecture
df = df[~((df['manufacturer']=='AMD') & (df['architecture']=='Ultra-Threaded SE'))] # remove all AMD manufactured chips from UT SE architecture

disp

Unnamed: 0_level_0,manufacturer,count
architecture,Unnamed: 1_level_1,Unnamed: 2_level_1
TeraScale,"[ATI, AMD]",2
TeraScale 2,"[AMD, ATI]",2
Ultra-Threaded SE,"[ATI, AMD]",2


In [55]:
get_association_df(df, 'architecture', 'generation')

Unnamed: 0_level_0,architecture,count
generation,Unnamed: 1_level_1,Unnamed: 2_level_1
All-In-One(Rx 200),"[TeraScale 2, GCN 1.0, GCN 3.0]",3
All-In-One(Rx 300),"[GCN 1.0, GCN 3.0]",2
All-In-Wonder(2006 Edition),"[R300, Ultra-Threaded SE]",2
All-In-Wonder(7000),"[Rage 6, Rage 7]",2
All-In-Wonder(9000),"[R300, Rage 7]",2
...,...,...
Tesla Kepler(Kxx),"[Kepler, Kepler 2.0]",2
Tesla Maxwell(Mxx),"[Maxwell 2.0, Maxwell]",2
Volcanic Islands(R5 200),"[TeraScale 2, GCN 1.0]",2
Volcanic Islands(R7 200),"[GCN 1.0, GCN 2.0]",2


In [56]:
disp = get_association_df(df, 'architecture', 'gpu_name')

df[df['gpu_name']=='Condor']
df = df[~((df['gpu_name']=='Condor') & (df['architecture']=='G400'))] # remove all Condor chip products from G400 architecture
disp

Unnamed: 0_level_0,architecture,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Condor,"[G400, G500]",2


**Graphics**

In [57]:
disp = get_association_df(df, 'directx_major_version', 'gpu_name')
df = df[~((df['directx_major_version']==10.0) & (df['gpu_name']=='GT215'))] # remove the older GT215 chip product
disp

Unnamed: 0_level_0,directx_major_version,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
GT215,"[10.0, 11.0]",2


In [58]:
disp = get_association_df(df, 'cuda_minor_version', 'gpu_name')
df = df[~((df['cuda_minor_version']==1.0) & (df['gpu_name']=='G92B'))] # remove the older G92B chip product
disp

Unnamed: 0_level_0,cuda_minor_version,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
G92B,"[1.0, 3.0]",2


In [59]:
disp = get_association_df(df, 'generation', 'gpu_name')
disp

Unnamed: 0_level_0,generation,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
18800-1,"[EGA, VGA]",2
AD102,"[GeForce 40, Server Ada(Lxx), Workstation Ada(...",3
AD103,"[GeForce 40, GeForce 40 Mobile, Workstation Ad...",4
AD104,"[GeForce 40, GeForce 40 Mobile, Server Ada(Lxx...",5
AD106,"[GeForce 40, GeForce 40 Mobile, Ada-MW(x000A)]",3
...,...,...
Vega 10,"[Radeon Instinct(MIx), Radeon Pro Vega(Vega Se...",7
Vega 20,"[Radeon Instinct(MIx), Radeon Pro Vega(Vega II...",4
Venus,"[FirePro Mobile(Mx100), Solar System(HD 8800M)...",6
Wani,"[GCN 3.0 IGP(Carrizo Mobile), GCN 3.0 IGP(Bris...",2


In [60]:
disp = get_association_df(df, 'name', 'generation')
disp

Unnamed: 0_level_0,name,count
generation,Unnamed: 1_level_1,Unnamed: 2_level_1
Ada-MW(x000A),"[RTX 2000 Embedded Ada Generation, RTX 2000 Ma...",13
Alchemist(Arc 3 Mobile),"[Arc A350M, Arc A370M, Arc A380M]",3
Alchemist(Arc 3),"[Arc A310, Arc A350, Arc A380]",3
Alchemist(Arc 5 Mobile),"[Arc A550M, Arc A530M, Arc A570M]",3
Alchemist(Arc 7 Mobile),"[Arc A730M, Arc A770M]",2
...,...,...
XG40(Volari 8000),"[Volari 8300, Volari 8600 XT]",2
XG40(Volari V3),"[Volari V3, Volari V3 XT]",2
XG40(Volari V5),"[Volari Duo V5 Ultra, Volari V5, Volari V5 Ultra]",3
XG40(Volari V8),"[Volari Duo V8 Ultra, Volari V8, Volari V8 Ultra]",3


In [61]:
disp = get_association_df(df, 'architecture', 'name')
disp

Unnamed: 0_level_0,architecture,count
name,Unnamed: 1_level_1,Unnamed: 2_level_1
GMA 3000,"[Generation 4.0, Generation 3.5]",2
GeForce 610M,"[Fermi 2.0, Fermi]",2
GeForce 6200 AGP,"[Celsius, Curie]",2
GeForce 710A,"[Fermi 2.0, Kepler 2.0]",2
GeForce 710M,"[Kepler 2.0, Fermi 2.0]",2
...,...,...
Radeon X1050,"[R300, R400]",2
UHD Graphics 16EU Mobile,"[Generation 11.0, Generation 12.1, Generation ...",3
UHD Graphics 24EU Mobile,"[Generation 9.5, Generation 11.0, Generation 1...",3
UHD Graphics 32EU Mobile,"[Generation 11.0, Generation 12.1]",2


In [62]:
disp = get_association_df(df, 'manufacturer', 'name')
disp

Unnamed: 0_level_0,manufacturer,count
name,Unnamed: 1_level_1,Unnamed: 2_level_1


In [63]:
disp = get_association_df(df, 'architecture', 'gpu_name')
disp

Unnamed: 0_level_0,architecture,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1


In [64]:
disp = get_association_df(df, 'manufacturer', 'gpu_name')
disp

Unnamed: 0_level_0,manufacturer,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1


In [65]:
disp = get_association_df(df, 'manufacturer', 'generation')
disp

Unnamed: 0_level_0,manufacturer,count
generation,Unnamed: 1_level_1,Unnamed: 2_level_1
Console GPU(Microsoft),"[NVIDIA, ATI, AMD]",3
Console GPU(Nintendo),"[ATI, AMD, NVIDIA]",3
Console GPU(Sony),"[Sony, NVIDIA, AMD]",3


In [66]:
disp = get_association_df(df, 'generation', 'gpu_name')
disp

Unnamed: 0_level_0,generation,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
18800-1,"[EGA, VGA]",2
AD102,"[GeForce 40, Server Ada(Lxx), Workstation Ada(...",3
AD103,"[GeForce 40, GeForce 40 Mobile, Workstation Ad...",4
AD104,"[GeForce 40, GeForce 40 Mobile, Server Ada(Lxx...",5
AD106,"[GeForce 40, GeForce 40 Mobile, Ada-MW(x000A)]",3
...,...,...
Vega 10,"[Radeon Instinct(MIx), Radeon Pro Vega(Vega Se...",7
Vega 20,"[Radeon Instinct(MIx), Radeon Pro Vega(Vega II...",4
Venus,"[FirePro Mobile(Mx100), Solar System(HD 8800M)...",6
Wani,"[GCN 3.0 IGP(Carrizo Mobile), GCN 3.0 IGP(Bris...",2


In [67]:
df = df[df['architecture'].isna() == False] # drop all null architectures

In [68]:
disp = get_association_df(df, 'foundry', 'gpu_name')
df = df[~((df['foundry']=='UMC') & (df['gpu_name']=='M9'))] # remove the older M9 chip variant
df = df[~((df['foundry']=='SGS Microelettronica') & (df['gpu_name']=='Mach64 GT-B'))] # remove SGS Microelettronica foundry chip version
disp

Unnamed: 0_level_0,foundry,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
M9,"[TSMC, UMC]",2
Mach64 GT-B,"[SGS Microelettronica, UMC]",2


In [69]:
disp = get_association_df(df, 'foundry', 'gpu_name')
disp

Unnamed: 0_level_0,foundry,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1


In [70]:
disp = get_association_df(df, 'transistor_density_k_mm2', 'gpu_name')
df = df[~((df['foundry']==16900.0) & (df['gpu_name']=='GM20B'))] # remove the older M9 chip variant
disp

Unnamed: 0_level_0,transistor_density_k_mm2,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
GM20B,"[16900.0, 20000.0]",2


In [71]:
disp = get_association_df(df, 'die_size_mm2', 'gpu_name')
df = df[~((df['die_size_mm2']==118.0) & (df['gpu_name']=='GM20B'))] # remove the older M9 chip variant
disp

Unnamed: 0_level_0,die_size_mm2,count
gpu_name,Unnamed: 1_level_1,Unnamed: 2_level_1
GM20B,"[118.0, 100.0]",2


In [72]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3087 entries, 0 to 3205
Data columns (total 54 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   manufacturer                      3087 non-null   object 
 1   name                              3087 non-null   object 
 2   gpu_name                          3087 non-null   object 
 3   generation                        3087 non-null   object 
 4   base_clock_mhz                    3087 non-null   float64
 5   boost_clock_mhz                   3087 non-null   float64
 6   architecture                      3087 non-null   object 
 7   foundry                           2962 non-null   object 
 8   process_size_nm                   3083 non-null   float64
 9   transistor_count_m                2846 non-null   float64
 10  transistor_density_k_mm2          2817 non-null   float64
 11  die_size_mm2                      2882 non-null   float64
 12  chip_packag

In [73]:
df.to_csv('../Source/Clean/2025-08.csv', index=False)