In [2]:
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv
import pandas as pd

In [3]:
load_dotenv()

pg_user = os.getenv('PG_USER')
pg_password = os.getenv('PG_PASSWORD')
pg_host = os.getenv('PG_HOST')
pg_port = os.getenv('PG_PORT')
pg_db = os.getenv('PG_GPU_DB')

pg_conn_str = f"postgresql+psycopg2://{pg_user}:{pg_password}@{pg_host}:{pg_port}/{pg_db}"
pg_engine = create_engine(pg_conn_str)

Descriptive Analytics

Business Question: What are the average performance metrics for each GPU Chip?

In [9]:
sql_query = r'''
WITH performance_data AS (
    SELECT
        s."gpuChip",
        CAST((REGEXP_MATCHES(w."1080p Medium", '\(([\d\.]+)\)'))[1] AS NUMERIC) AS fps_1080p_medium,
        CAST((REGEXP_MATCHES(w."1080p Ultra", '\(([\d\.]+)\)'))[1] AS NUMERIC) AS fps_1080p_ultra,
        CAST((REGEXP_MATCHES(w."1440p Ultra", '\(([\d\.]+)\)'))[1] AS NUMERIC) AS fps_1440p_ultra,
        CAST((REGEXP_MATCHES(w."4K Ultra", '\(([\d\.]+)\)'))[1] AS NUMERIC) AS fps_4k_ultra
    FROM sql_project.gpu_specs_raw s
    JOIN sql_project.gpu_hierarchy_web_raw w
        ON s."productName" = w."Graphics Card"
    WHERE w."4K Ultra" ~ '\([\d\.]+\)'
)
SELECT
    p."gpuChip",
    ROUND(AVG(p.fps_1080p_medium), 2) AS avg_fps_1080p_medium,
    ROUND(AVG(p.fps_1080p_ultra), 2) AS avg_fps_1080p_ultra,
    ROUND(AVG(p.fps_1440p_ultra), 2) AS avg_fps_1440p_ultra,
    ROUND(AVG(p.fps_4k_ultra), 2) AS avg_fps_4k_ultra,
    COUNT(*) AS num_models
FROM performance_data p
GROUP BY p."gpuChip"
ORDER BY avg_fps_4k_ultra DESC;
'''
df = pd.read_sql_query(sql_query, con=pg_engine)
print(df)

        gpuChip  avg_fps_1080p_medium  avg_fps_1080p_ultra  \
0  Arctic Sound                200.30               154.60   
1         GA102                184.10               142.70   
2         GA103                140.20               102.60   
3         GA100                121.40                83.80   
4         GV100                123.13                85.63   
5         GA104                107.10                71.00   
6       Navi 21                101.40                69.40   
7       Vega 20                 82.30                51.60   
8        Thames                 48.60                27.50   

   avg_fps_1440p_ultra  avg_fps_4k_ultra  num_models  
0               137.70             89.60           1  
1               117.70             70.60           1  
2                74.30             40.20           1  
3                60.30             33.17           3  
4                59.13             30.53           3  
5                49.20             27.00         

Insights:
-Arctic Sound appears to be the chip with the best performance at 100% all around
-200 fps on 1080p for th eArtic Sound is the top performance
-Thames is the chip with the worst performance
-7fps on 4k ultra for thames is the lowest performance

Recommendations:
-prioritize chips such as Arctic Sound and GA chips to manufacture products with the best performance
-define business model, stay away from Thames but potentially use Navi for budget friendly products despite the lower performance

Predictions:
-prioritizing higher quality chips will make better products and lead to more purchases
-using budget friendly chips would increase purchases for those who are on a budget

Diagnostic Analytics

Business Question: What are the highest ranked GPUs based on the performance metrics of the GPU chip?

In [10]:
sql_query = r'''
WITH joined_data AS (
    SELECT
        s."productName",
        s."gpuChip",
        s."releaseYear",
        CAST((REGEXP_MATCHES(w."4K Ultra", '\(([\d\.]+)\)'))[1] AS NUMERIC) AS fps_4k_ultra
    FROM sql_project.gpu_specs_raw s
    JOIN sql_project.gpu_hierarchy_web_raw w
        ON s."productName" = w."Graphics Card"
    WHERE w."4K Ultra" ~ '\([\d\.]+\)'
),
ranked_gpus AS (
    SELECT *,
           RANK() OVER (ORDER BY fps_4k_ultra DESC) AS fps_rank
    FROM joined_data
)
SELECT
    r.fps_rank,
    r."productName",
    r."releaseYear",
    r."gpuChip",
    r.fps_4k_ultra
FROM ranked_gpus r
ORDER BY r.fps_rank
LIMIT 15;
'''
df = pd.read_sql(sql_query, pg_conn_str)
print(df)

    fps_rank          productName  releaseYear       gpuChip  fps_4k_ultra
0          1     GeForce RTX 5090       2025.0  Arctic Sound          89.6
1          2     GeForce RTX 4090       2022.0         GA102          70.6
2          3     GeForce RTX 5080       2025.0         GA100          57.0
3          4   Radeon RX 7900 XTX       2022.0         GA103          40.2
4          5  GeForce RTX 4070 Ti       2023.0         GV100          40.0
5          6     GeForce RTX 5070       2025.0       Navi 21          36.4
6          7    Radeon RX 7900 XT       2022.0         GA100          34.4
7          8     GeForce RTX 4070       2023.0         GV100          30.3
8          9    Radeon RX 7800 XT       2023.0         GA104          27.0
9         10    Radeon RX 7700 XT       2023.0         GV100          21.3
10        11     GeForce RTX 4060       2023.0       Vega 20          15.1
11        12    Radeon RX 7600 XT       2024.0       Navi 21          13.3
12        13       Radeon

Insights:
-NVIDIA holds the top 3 GPUs based on chips and performance
-Bottom 3 spots based on performance are held by AMD 
-The top NVIDIA GPU gets more than double the fps as the top AMD performer

Recommendations:
-continue product manufacturing to stay ahead of AMD based on performance
-Look into creating more products with Arctic Sound chip to further boost products with good performances

Predictions:
-Staying ahead of AMD will build more loyal customers who value our products performance
-Using more of the higher end chips offers more variaty for premium customers leading to more purchases